[Mesa-dev] [PATCH 8/9] intel/blorp: Handle fast-clear directly in blorp_ccs_op
Jason Ekstrand
jason at jlekstrand.net
Tue May 15 22:28:11 UTC 2018
---
src/intel/blorp/blorp_clear.c | 199 +++++++++++++++++++-----------------------
1 file changed, 88 insertions(+), 111 deletions(-)
diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c
index 39bc0c6..5625221 100644
--- a/src/intel/blorp/blorp_clear.c
+++ b/src/intel/blorp/blorp_clear.c
@@ -193,104 +193,7 @@ get_fast_clear_rect(const struct isl_device *dev,
/* Only single sampled surfaces need to (and actually can) be resolved. */
if (aux_surf->usage == ISL_SURF_USAGE_CCS_BIT) {
- /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
- * Target(s)", beneath the "Fast Color Clear" bullet (p327):
- *
- * Clear pass must have a clear rectangle that must follow
- * alignment rules in terms of pixels and lines as shown in the
- * table below. Further, the clear-rectangle height and width
- * must be multiple of the following dimensions. If the height
- * and width of the render target being cleared do not meet these
- * requirements, an MCS buffer can be created such that it
- * follows the requirement and covers the RT.
- *
- * The alignment size in the table that follows is related to the
- * alignment size that is baked into the CCS surface format but with X
- * alignment multiplied by 16 and Y alignment multiplied by 32.
- */
- x_align = isl_format_get_layout(aux_surf->format)->bw;
- y_align = isl_format_get_layout(aux_surf->format)->bh;
-
- x_align *= 16;
-
- /* SKL+ line alignment requirement for Y-tiled are half those of the prior
- * generations.
- */
- if (dev->info->gen >= 9)
- y_align *= 16;
- else
- y_align *= 32;
-
- /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
- * Target(s)", beneath the "Fast Color Clear" bullet (p327):
- *
- * In order to optimize the performance MCS buffer (when bound to
- * 1X RT) clear similarly to MCS buffer clear for MSRT case,
- * clear rect is required to be scaled by the following factors
- * in the horizontal and vertical directions:
- *
- * The X and Y scale down factors in the table that follows are each
- * equal to half the alignment value computed above.
- */
- x_scaledown = x_align / 2;
- y_scaledown = y_align / 2;
-
- if (ISL_DEV_IS_HASWELL(dev)) {
- /* The following text was added in the Haswell PRM, "3D Media GPGPU
- * Engine" >> "MCS Buffer for Render Target(s)" >> Table "Color Clear
- * of Non-MultiSampler Render Target Restrictions":
- *
- * "Clear rectangle must be aligned to two times the number of
- * pixels in the table shown below due to 16X16 hashing across the
- * slice."
- *
- * It has persisted in the documentation for all platforms up until
- * Cannonlake and possibly even beyond. However, we believe that it
- * is only needed on Haswell.
- *
- * There are a couple possible explanations for this restriction:
- *
- * 1) If you assume that the hardware is writing to the CCS as
- * bytes, then the x/y_align computed above gives you an alignment
- * in the CCS of 8x8 bytes and, if 16x16 is needed for hashing, we
- * need to multiply by 2.
- *
- * 2) Haswell is a bit unique in that it's CCS tiling does not line
- * up with Y-tiling on a cache-line granularity. Instead, it has
- * an extra bit of swizzling in bit 9. Also, bit 6 swizzling
- * applies to the CCS on Haswell. This means that Haswell CTS
- * does not match on a cache-line granularity but it does match on
- * a 2x2 cache line granularity.
- *
- * Clearly, the first explanation seems to follow documentation the
- * best but they may be related. In any case, empirical evidence
- * seems to confirm that it is, indeed required on Haswell.
- *
- * On Broadwell things get a bit stickier. Broadwell adds support
- * for mip-mapped CCS with an alignment in the CCS of 256x128. For a
- * 32bpb main surface, the above computation will yield a x/y_align
- * of 128x128 for a Y-tiled main surface and 256x64 for X-tiled. In
- * either case, if we double the alignment, we will get an alignment
- * bigger than horizontal and vertical alignment of the CCS and fast
- * clears of one LOD may leak into others.
- *
- * Starting with Skylake, the image alignment for the CCS is only
- * 128x64 which is exactly the x/h_align computed above if the main
- * surface has a 32bpb format. Also, the "Render Target Resolve"
- * page in the bspec (not the PRM) says, "The Resolve Rectangle size
- * is same as Clear Rectangle size from SKL+". The x/y_align
- * computed above (without doubling) match the resolve rectangle
- * calculation perfectly.
- *
- * Finally, to confirm all this, a full test run was performed on
- * Feb. 9, 2018 with this doubling removed and the only platform
- * which seemed to be affected was Haswell. The run consisted of
- * piglit, dEQP, the Vulkan CTS 1.0.2, the OpenGL 4.5 CTS, and the
- * OpenGL ES 3.2 CTS.
- */
- x_align *= 2;
- y_align *= 2;
- }
+ unreachable("This function only supports MCS fast-clear");
} else {
assert(aux_surf->usage == ISL_SURF_USAGE_MCS_BIT);
@@ -826,14 +729,8 @@ blorp_ccs_op(struct blorp_batch *batch,
enum isl_format format,
enum isl_aux_op ccs_op)
{
- if (ccs_op == ISL_AUX_OP_FAST_CLEAR) {
- blorp_fast_clear(batch, surf, format, level, start_layer, num_layers,
- 0, 0,
- minify(surf->surf->logical_level0_px.w, level),
- minify(surf->surf->logical_level0_px.h, level));
- return;
- } else if (ISL_DEV_GEN(batch->blorp->isl_dev) < 10 &&
- ccs_op == ISL_AUX_OP_AMBIGUATE) {
+ if (ISL_DEV_GEN(batch->blorp->isl_dev) < 10 &&
+ ccs_op == ISL_AUX_OP_AMBIGUATE) {
/* Prior to Cannonlake, the ambiguate is not available as a hardware
* operation. Instead, we have to fake it by carefully binding the CCS
* as a render target and clearing it to 0. We leave that complicated
@@ -863,6 +760,11 @@ blorp_ccs_op(struct blorp_batch *batch,
isl_format_get_layout(params.dst.aux_surf.format);
assert(aux_fmtl->txc == ISL_TXC_CCS);
+ /* The PRM Sections entitled "Fast Color Clear" and "Render Target Resolve"
+ * contain tables for the scale down factor for fast clear and resolve
+ * rectangles. The values in those tables are easily computed from the
+ * CCS element block size.
+ */
unsigned x_scaledown, y_scaledown;
if (ISL_DEV_GEN(batch->blorp->isl_dev) >= 9) {
x_scaledown = aux_fmtl->bw * 8;
@@ -871,16 +773,91 @@ blorp_ccs_op(struct blorp_batch *batch,
x_scaledown = aux_fmtl->bw * 8;
y_scaledown = aux_fmtl->bh * 16;
} else {
- x_scaledown = aux_fmtl->bw / 2;
- y_scaledown = aux_fmtl->bh / 2;
+ assert(ISL_DEV_GEN(batch->blorp->isl_dev) == 7);
+ if (ccs_op == ISL_AUX_OP_FAST_CLEAR) {
+ x_scaledown = aux_fmtl->bw * 8;
+ y_scaledown = aux_fmtl->bh * 16;
+ } else {
+ x_scaledown = aux_fmtl->bw / 2;
+ y_scaledown = aux_fmtl->bh / 2;
+ }
}
+
+ unsigned x_align = x_scaledown;
+ unsigned y_align = y_scaledown;
+ if (ccs_op == ISL_AUX_OP_FAST_CLEAR) {
+ /* The PRM Section entitled "Fast Color Clear" contains tables for the
+ * scaledown and alignment factors for fast clear operations. In all
+ * cases, the alignment table is 2x the scaledown table.
+ */
+ x_align *= 2;
+ y_align *= 2;
+
+ if (ISL_DEV_IS_HASWELL(batch->blorp->isl_dev)) {
+ /* The following text was added in the Haswell PRM, "3D Media GPGPU
+ * Engine" >> "MCS Buffer for Render Target(s)" >> Table "Color Clear
+ * of Non-MultiSampler Render Target Restrictions":
+ *
+ * "Clear rectangle must be aligned to two times the number of
+ * pixels in the table shown below due to 16X16 hashing across the
+ * slice."
+ *
+ * It has persisted in the documentation for all platforms up until
+ * Cannonlake and possibly even beyond. However, we believe that it
+ * is only needed on Haswell.
+ *
+ * There are a couple possible explanations for this restriction:
+ *
+ * 1) If you assume that the hardware is writing to the CCS as
+ * bytes, then the x/y_align computed above gives you an alignment
+ * in the CCS of 8x8 bytes and, if 16x16 is needed for hashing, we
+ * need to multiply by 2.
+ *
+ * 2) Haswell is a bit unique in that it's CCS tiling does not line
+ * up with Y-tiling on a cache-line granularity. Instead, it has
+ * an extra bit of swizzling in bit 9. Also, bit 6 swizzling
+ * applies to the CCS on Haswell. This means that Haswell CTS
+ * does not match on a cache-line granularity but it does match on
+ * a 2x2 cache line granularity.
+ *
+ * Clearly, the first explanation seems to follow documentation the
+ * best but they may be related. In any case, empirical evidence
+ * seems to confirm that it is, indeed required on Haswell.
+ *
+ * On Broadwell things get a bit stickier. Broadwell adds support
+ * for mip-mapped CCS with an alignment in the CCS of 256x128. For a
+ * 32bpb main surface, the above computation will yield a x/y_align
+ * of 128x128 for a Y-tiled main surface and 256x64 for X-tiled. In
+ * either case, if we double the alignment, we will get an alignment
+ * bigger than horizontal and vertical alignment of the CCS and fast
+ * clears of one LOD may leak into others.
+ *
+ * Starting with Skylake, the image alignment for the CCS is only
+ * 128x64 which is exactly the x/h_align computed above if the main
+ * surface has a 32bpb format. Also, the "Render Target Resolve"
+ * page in the bspec (not the PRM) says, "The Resolve Rectangle size
+ * is same as Clear Rectangle size from SKL+". The x/y_align
+ * computed above (without doubling) match the resolve rectangle
+ * calculation perfectly.
+ *
+ * Finally, to confirm all this, a full test run was performed on
+ * Feb. 9, 2018 with this doubling removed and the only platform
+ * which seemed to be affected was Haswell. The run consisted of
+ * piglit, dEQP, the Vulkan CTS 1.0.2, the OpenGL 4.5 CTS, and the
+ * OpenGL ES 3.2 CTS.
+ */
+ x_align *= 2;
+ y_align *= 2;
+ }
+ }
+
params.x0 = params.y0 = 0;
params.x1 = minify(params.dst.aux_surf.logical_level0_px.width, level);
params.y1 = minify(params.dst.aux_surf.logical_level0_px.height, level);
- params.x1 = ALIGN(params.x1, x_scaledown) / x_scaledown;
- params.y1 = ALIGN(params.y1, y_scaledown) / y_scaledown;
+ params.x1 = ALIGN(params.x1, x_align) / x_scaledown;
+ params.y1 = ALIGN(params.y1, y_align) / y_scaledown;
- assert(ccs_op != ISL_AUX_OP_FAST_CLEAR);
+ memset(¶ms.wm_inputs.clear_color, 0xff, 4*sizeof(float));
params.fast_clear_op = ccs_op;
params.num_layers = num_layers;
--
2.5.0.400.gff86faf
More information about the mesa-dev
mailing list