<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On Fri, Jan 19, 2018 at 3:47 PM, Jason Ekstrand <span dir="ltr"><<a href="mailto:jason@jlekstrand.net" target="_blank">jason@jlekstrand.net</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">This pass performs an "ambiguate" operation on a CCS-compressed surface<br>
by manually writing zeros into the CCS. On gen8+, ISL gives us a fairly<br>
detailed notion of how the CCS is laid out so this is fairly simple to<br>
do. On gen7, the CCS tiling is quite crazy but that isn't an issue<br>
because we can only do CCS on single-slice images so we can just blast<br>
over the entire CCS buffer if we want to.<br>
---<br>
src/intel/blorp/blorp.h | 5 ++<br>
src/intel/blorp/blorp_clear.c | 149 ++++++++++++++++++++++++++++++<wbr>++++++++++++<br>
2 files changed, 154 insertions(+)<br>
<br>
diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h<br>
index a1dd571..478a9af 100644<br>
--- a/src/intel/blorp/blorp.h<br>
+++ b/src/intel/blorp/blorp.h<br>
@@ -204,6 +204,11 @@ blorp_ccs_resolve(struct blorp_batch *batch,<br>
enum blorp_fast_clear_op resolve_op);<br>
<br>
void<br>
+blorp_ccs_ambiguate(struct blorp_batch *batch,<br>
+ struct blorp_surf *surf,<br>
+ uint32_t level, uint32_t layer);<br>
+<br>
+void<br>
blorp_mcs_partial_resolve(<wbr>struct blorp_batch *batch,<br>
struct blorp_surf *surf,<br>
enum isl_format format,<br>
diff --git a/src/intel/blorp/blorp_clear.<wbr>c b/src/intel/blorp/blorp_clear.<wbr>c<br>
index 8e7bc9f..fa2abd9 100644<br>
--- a/src/intel/blorp/blorp_clear.<wbr>c<br>
+++ b/src/intel/blorp/blorp_clear.<wbr>c<br>
@@ -881,3 +881,152 @@ blorp_mcs_partial_resolve(<wbr>struct blorp_batch *batch,<br>
<br>
batch->blorp->exec(batch, ¶ms);<br>
}<br>
+<br>
+/** Clear a CCS to the "uncompressed" state<br>
+ *<br>
+ * This pass is the CCS equivalent of a "HiZ resolve". It sets the CCS values<br>
+ * for a given layer/level of a surface to 0x0 which is the "uncompressed"<br>
+ * state which tells the sampler to go look at the main surface.<br>
+ */<br>
+void<br>
+blorp_ccs_ambiguate(struct blorp_batch *batch,<br>
+ struct blorp_surf *surf,<br>
+ uint32_t level, uint32_t layer)<br>
+{<br>
+ struct blorp_params params;<br>
+ blorp_params_init(¶ms);<br>
+<br>
+ assert(ISL_DEV_GEN(batch-><wbr>blorp->isl_dev) >= 7);<br>
+<br>
+ const struct isl_format_layout *aux_fmtl =<br>
+ isl_format_get_layout(surf-><wbr>aux_surf->format);<br>
+ assert(aux_fmtl->txc == ISL_TXC_CCS);<br>
+<br>
+ params.dst = (struct brw_blorp_surface_info) {<br>
+ .enabled = true,<br>
+ .addr = surf->aux_addr,<br>
+ .view = {<br>
+ .usage = ISL_SURF_USAGE_RENDER_TARGET_<wbr>BIT,<br>
+ .format = ISL_FORMAT_R32G32B32A32_UINT,<br>
+ .base_level = 0,<br>
+ .base_array_layer = 0,<br>
+ .levels = 1,<br>
+ .array_len = 1,<br>
+ .swizzle = ISL_SWIZZLE_IDENTITY,<br>
+ },<br>
+ };<br>
+<br>
+ uint32_t z = 0;<br>
+ if (surf->surf->dim == ISL_SURF_DIM_3D) {<br>
+ z = layer;<br>
+ layer = 0;<br>
+ }<br>
+<br>
+ uint32_t offset_B, x_offset_el, y_offset_el;<br>
+ isl_surf_get_image_offset_el(<wbr>surf->aux_surf, level, layer, z,<br>
+ &x_offset_el, &y_offset_el);<br>
+ isl_tiling_get_intratile_<wbr>offset_el(surf->aux_surf-><wbr>tiling, aux_fmtl->bpb,<br>
+ surf->aux_surf->row_pitch,<br>
+ x_offset_el, y_offset_el,<br>
+ &offset_B, &x_offset_el, &y_offset_el);<br>
+ params.dst.addr.offset += offset_B;<br>
+<br>
+ const uint32_t width_px = minify(surf->surf->logical_<wbr>level0_px.width, level);<br>
+ const uint32_t height_px = minify(surf->surf->logical_<wbr>level0_px.height, level);<br>
+ const uint32_t width_el = DIV_ROUND_UP(width_px, aux_fmtl->bw);<br>
+ const uint32_t height_el = DIV_ROUND_UP(height_px, aux_fmtl->bh);<br>
+<br>
+ struct isl_tile_info ccs_tile_info;<br>
+ isl_surf_get_tile_info(surf-><wbr>aux_surf, &ccs_tile_info);<br>
+<br>
+ /* We're going to map it as a regular RGBA32_UINT surface. We need to<br>
+ * downscale a good deal. We start by computing the area on the CCS to<br>
+ * clear in units of Y-tiled cache lines.<br>
+ */<br>
+ uint32_t x_offset_y_cl, y_offset_y_cl, width_y_cl, height_y_cl;<br>
+ if (ISL_DEV_GEN(batch->blorp-><wbr>isl_dev) >= 8) {<br>
+ /* From the Sky Lake PRM Vol. 12 in the section on planes:<br>
+ *<br>
+ * "The Color Control Surface (CCS) contains the compression status<br>
+ * of the cache-line pairs. The compression state of the cache-line<br>
+ * pair is specified by 2 bits in the CCS. Each CCS cache-line<br>
+ * represents an area on the main surface of 16x16 sets of 128 byte<br>
+ * Y-tiled cache-line-pairs. CCS is always Y tiled."<br>
+ *<br>
+ * Each 2-bit surface element in the CCS corresponds to a single<br>
+ * cache-line pair in the main surface. This means that 16x16 el block<br>
+ * in the CCS maps to a Y-tiled cache line. Fortunately, CCS layouts<br>
+ * are calculated with a very large alignment so we can round up to a<br>
+ * whole cache line without worrying about overdraw.<br>
+ */<br>
+<br>
+ /* On Broadwell and above, a CCS tile is the same as a Y tile when<br>
+ * viewed at the cache-line granularity. Fortunately, the horizontal<br>
+ * and vertical alignment requirements of the CCS are such that we can<br>
+ * align to an entire cache line without worrying about crossing over<br>
+ * from one LOD to another.<br>
+ */<br>
+ const uint32_t scale_x = ccs_tile_info.logical_extent_<wbr>el.w / 8;<br>
+ const uint32_t scale_y = ccs_tile_info.logical_extent_<wbr>el.h / 8;<br>
+ assert(surf->aux_surf->image_<wbr>alignment_el.w % scale_x == 0);<br>
+ assert(surf->aux_surf->image_<wbr>alignment_el.h % scale_y == 0);<br>
+<br>
+ assert(x_offset_el % scale_x == 0 && y_offset_el % scale_y == 0);<br>
+ x_offset_y_cl = x_offset_el / scale_x;<br>
+ y_offset_y_cl = y_offset_el / scale_y;<br>
+ width_y_cl = DIV_ROUND_UP(width_el, scale_x);<br>
+ height_y_cl = DIV_ROUND_UP(height_el, scale_y);<br>
+ } else {<br>
+ /* On gen7, the CCS tiling is not so nice. However, there we are<br>
+ * guaranteed that we only have a single level and slice so we don't<br>
+ * have to worry about it and can just align to a whole tile.<br>
+ */<br>
+ assert(x_offset_el == 0 && y_offset_el == 0);<br>
+ const uint32_t width_tl =<br>
+ DIV_ROUND_UP(width_el, ccs_tile_info.logical_extent_<wbr>el.w);<br>
+ const uint32_t height_tl =<br>
+ DIV_ROUND_UP(height_el, ccs_tile_info.logical_extent_<wbr>el.h);<br>
+ x_offset_y_cl = 0;<br>
+ y_offset_y_cl = 0;<br>
+ width_y_cl = width_tl * 8;<br>
+ height_y_cl = height_tl * 8;<br>
+ }<br>
+<br>
+ /* We're going to use a RGBA32 format so as to write data as quickly as<br>
+ * possible. A y-tiled cache line will then be 1x4 px.<br>
+ */<br>
+ const uint32_t x_offset_rgba_px = x_offset_y_cl;<br>
+ const uint32_t y_offset_rgba_px = y_offset_y_cl * 4;<br>
+ const uint32_t width_rgba_px = width_y_cl;<br>
+ const uint32_t height_rgba_px = height_y_cl * 4;<br>
+<br>
+ MAYBE_UNUSED bool ok =<br>
+ isl_surf_init(batch->blorp-><wbr>isl_dev, ¶ms.dst.surf,<br>
+ .dim = ISL_SURF_DIM_2D,<br>
+ .format = ISL_FORMAT_R32G32B32A32_UINT,<br>
+ .width = width_rgba_px + x_offset_rgba_px,<br>
+ .height = height_rgba_px + y_offset_rgba_px,<br>
+ .depth = 1,<br>
+ .levels = 1,<br>
+ .array_len = 1,<br>
+ .samples = 1,<br>
+ .row_pitch = surf->aux_surf->row_pitch,<br>
+ .usage = ISL_SURF_USAGE_RENDER_TARGET_<wbr>BIT,<br>
+ .tiling_flags = ISL_TILING_Y0_BIT);<br>
+ assert(ok);<br>
+ assert(offset_B + params.dst.surf.size <= surf->aux_surf->size);<br></blockquote><div><br></div><div>This assertion is bogus. I added it last-minute and didn't run Jenkins between adding it and sending it out.</div><div><br></div><div>The reason it's bogus is because offset_B may offset you horizontally into the image and, when you start at offset_B and go to the right by a full stride worth of pages, you end up outside the CCS. We will never render outside the CCS because our width is much smaller than that. If I tried hard enough, I could come up with a better assertion but I think it's best to just delete it.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br>
+ params.x0 = x_offset_rgba_px;<br>
+ params.y0 = y_offset_rgba_px;<br>
+ params.x1 = x_offset_rgba_px + width_rgba_px;<br>
+ params.y1 = y_offset_rgba_px + height_rgba_px;<br>
+<br>
+ /* A CCS value of 0 means "uncompressed." */<br>
+ memset(¶ms.wm_inputs.<wbr>clear_color, 0,<br>
+ sizeof(params.wm_inputs.clear_<wbr>color));<br>
+<br>
+ if (!blorp_params_get_clear_<wbr>kernel(batch->blorp, ¶ms, true))<br>
+ return;<br>
+<br>
+ batch->blorp->exec(batch, ¶ms);<br>
+}<br>
<span class="HOEnZb"><font color="#888888">--<br>
2.5.0.400.gff86faf<br>
<br>
</font></span></blockquote></div><br></div></div>