[Mesa-dev] [PATCH 05/30] i965/miptree: Rework aux enabling
Jason Ekstrand
jason at jlekstrand.net
Fri Jun 16 22:41:27 UTC 2017
This commit replaces the complex and confusing set of disable flags with
two fairly straightforward fields which describe the intended auxiliary
surface usage and whether or not the miptree supports fast clears.
Right now, supports_fast_clear can be entirely derived from aux_usage
but that will not always be the case.
This commit makes functional changes. One of these changes is that it
re-enables multisampled fast-clears which were accidentally disabled in
cec30a666930ddb8476a9452a89364a24979ff62 around a year ago. It should
also enable CCS_E for window-system buffers which are Y-tiled. They
will still get a full resolve like CCS_D but we will at least get some
of the advantage of compression.
---
src/mesa/drivers/dri/i965/brw_blorp.c | 4 +-
src/mesa/drivers/dri/i965/intel_fbo.c | 2 +-
src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 190 +++++++++++++-------------
src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 43 +++---
4 files changed, 120 insertions(+), 119 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c
index 00092ee..9bd25f0 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -762,7 +762,7 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
if (set_write_disables(irb, ctx->Color.ColorMask[buf], color_write_disable))
can_fast_clear = false;
- if (irb->mt->aux_disable & INTEL_AUX_DISABLE_CCS ||
+ if (!irb->mt->supports_fast_clear ||
!brw_is_color_fast_clear_compatible(brw, irb->mt, &ctx->Color.ClearColor))
can_fast_clear = false;
@@ -785,7 +785,7 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
*/
if (!irb->mt->mcs_buf) {
assert(!intel_miptree_is_lossless_compressed(brw, irb->mt));
- if (!intel_miptree_alloc_ccs(brw, irb->mt, false)) {
+ if (!intel_miptree_alloc_ccs(brw, irb->mt)) {
/* MCS allocation failed--probably this will only happen in
* out-of-memory conditions. But in any case, try to recover
* by falling back to a non-blorp clear technique.
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
index ee4aba9..6a64bcb 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -555,7 +555,7 @@ intel_renderbuffer_update_wrapper(struct brw_context *brw,
intel_renderbuffer_set_draw_offset(irb);
- if (intel_miptree_wants_hiz_buffer(brw, mt)) {
+ if (mt->aux_usage == ISL_AUX_USAGE_HIZ && !mt->hiz_buf) {
intel_miptree_alloc_hiz(brw, mt);
if (!mt->hiz_buf)
return false;
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 0f6d542..101317f 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -64,7 +64,7 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
*/
static enum intel_msaa_layout
compute_msaa_layout(struct brw_context *brw, mesa_format format,
- enum intel_aux_disable aux_disable)
+ uint32_t layout_flags)
{
/* Prior to Gen7, all MSAA surfaces used IMS layout. */
if (brw->gen < 7)
@@ -90,7 +90,7 @@ compute_msaa_layout(struct brw_context *brw, mesa_format format,
*/
if (brw->gen == 7 && _mesa_get_format_datatype(format) == GL_INT) {
return INTEL_MSAA_LAYOUT_UMS;
- } else if (aux_disable & INTEL_AUX_DISABLE_MCS) {
+ } else if (layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) {
/* We can't use the CMS layout because it uses an aux buffer, the MCS
* buffer. So fallback to UMS, which is identical to CMS without the
* MCS. */
@@ -148,9 +148,6 @@ intel_miptree_supports_ccs(struct brw_context *brw,
if (brw->gen < 7)
return false;
- if (mt->aux_disable & INTEL_AUX_DISABLE_MCS)
- return false;
-
/* This function applies only to non-multisampled render targets. */
if (mt->num_samples > 1)
return false;
@@ -215,6 +212,26 @@ intel_miptree_supports_ccs(struct brw_context *brw,
return true;
}
+static bool
+intel_miptree_supports_hiz(struct brw_context *brw,
+ struct intel_mipmap_tree *mt)
+{
+ if (!brw->has_hiz)
+ return false;
+
+ switch (mt->format) {
+ case MESA_FORMAT_Z_FLOAT32:
+ case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
+ case MESA_FORMAT_Z24_UNORM_X8_UINT:
+ case MESA_FORMAT_Z24_UNORM_S8_UINT:
+ case MESA_FORMAT_Z_UNORM16:
+ return true;
+ default:
+ return false;
+ }
+}
+
+
/* On Gen9 support for color buffer compression was extended to single
* sampled surfaces. This is a helper considering both auxiliary buffer
* type and number of samples telling if the given miptree represents
@@ -320,10 +337,9 @@ intel_miptree_create_layout(struct brw_context *brw,
mt->logical_width0 = width0;
mt->logical_height0 = height0;
mt->logical_depth0 = depth0;
- mt->aux_disable = (layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) != 0 ?
- INTEL_AUX_DISABLE_ALL : INTEL_AUX_DISABLE_NONE;
- mt->aux_disable |= INTEL_AUX_DISABLE_CCS;
mt->is_scanout = (layout_flags & MIPTREE_LAYOUT_FOR_SCANOUT) != 0;
+ mt->aux_usage = ISL_AUX_USAGE_NONE;
+ mt->supports_fast_clear = false;
mt->aux_state = NULL;
mt->cpp = _mesa_get_format_bytes(format);
mt->num_samples = num_samples;
@@ -337,7 +353,7 @@ intel_miptree_create_layout(struct brw_context *brw,
int depth_multiply = 1;
if (num_samples > 1) {
/* Adjust width/height/depth for MSAA */
- mt->msaa_layout = compute_msaa_layout(brw, format, mt->aux_disable);
+ mt->msaa_layout = compute_msaa_layout(brw, format, layout_flags);
if (mt->msaa_layout == INTEL_MSAA_LAYOUT_IMS) {
/* From the Ivybridge PRM, Volume 1, Part 1, page 108:
* "If the surface is multisampled and it is a depth or stencil
@@ -460,8 +476,7 @@ intel_miptree_create_layout(struct brw_context *brw,
if (!(layout_flags & MIPTREE_LAYOUT_FOR_BO) &&
_mesa_get_format_base_format(format) == GL_DEPTH_STENCIL &&
(brw->must_use_separate_stencil ||
- (brw->has_separate_stencil &&
- intel_miptree_wants_hiz_buffer(brw, mt)))) {
+ (brw->has_separate_stencil && intel_miptree_supports_hiz(brw, mt)))) {
uint32_t stencil_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD;
if (brw->gen == 6) {
stencil_flags |= MIPTREE_LAYOUT_TILING_ANY;
@@ -530,14 +545,44 @@ intel_miptree_create_layout(struct brw_context *brw,
return NULL;
}
- if (mt->aux_disable & INTEL_AUX_DISABLE_MCS)
- assert(mt->msaa_layout != INTEL_MSAA_LAYOUT_CMS);
-
return mt;
}
/**
+ * Choose the aux usage for this miptree. This function must be called fairly
+ * late in the miptree create process after we have a tiling.
+ */
+static void
+intel_miptree_choose_aux_usage(struct brw_context *brw,
+ struct intel_mipmap_tree *mt)
+{
+ assert(mt->aux_usage == ISL_AUX_USAGE_NONE);
+
+ if (mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) {
+ mt->aux_usage = ISL_AUX_USAGE_MCS;
+ } else if (intel_tiling_supports_ccs(brw, mt->tiling) &&
+ intel_miptree_supports_ccs(brw, mt)) {
+ if (!unlikely(INTEL_DEBUG & DEBUG_NO_RBC) &&
+ brw->gen >= 9 && !mt->is_scanout &&
+ intel_miptree_supports_ccs_e(brw, mt)) {
+ mt->aux_usage = ISL_AUX_USAGE_CCS_E;
+ } else {
+ mt->aux_usage = ISL_AUX_USAGE_CCS_D;
+ }
+ } else if (intel_miptree_supports_hiz(brw, mt)) {
+ mt->aux_usage = ISL_AUX_USAGE_HIZ;
+ }
+
+ /* We can do fast-clear on all auxiliary surface types that are
+ * allocated through the normal texture creation paths.
+ */
+ if (mt->aux_usage != ISL_AUX_USAGE_NONE)
+ mt->supports_fast_clear = true;
+}
+
+
+/**
* Choose an appropriate uncompressed format for a requested
* compressed format, if unsupported.
*/
@@ -670,6 +715,9 @@ miptree_create(struct brw_context *brw,
if (layout_flags & MIPTREE_LAYOUT_FOR_SCANOUT)
mt->bo->cache_coherent = false;
+ if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX))
+ intel_miptree_choose_aux_usage(brw, mt);
+
return mt;
}
@@ -726,29 +774,14 @@ intel_miptree_create(struct brw_context *brw,
}
}
- /* If this miptree is capable of supporting fast color clears, set
- * fast_clear_state appropriately to ensure that fast clears will occur.
- * Allocation of the MCS miptree will be deferred until the first fast
- * clear actually occurs or when compressed single sampled buffer is
- * written by the GPU for the first time.
+ /* Since CCS_E can compress more than just clear color, we create the CCS
+ * for it up-front. For CCS_D which only compresses clears, we create the
+ * CCS on-demand when a clear occurs that wants one.
*/
- if (intel_tiling_supports_ccs(brw, mt->tiling) &&
- intel_miptree_supports_ccs(brw, mt)) {
- mt->aux_disable &= ~INTEL_AUX_DISABLE_CCS;
- assert(brw->gen < 8 || mt->halign == 16 || num_samples <= 1);
-
- /* On Gen9+ clients are not currently capable of consuming compressed
- * single-sampled buffers. Disabling compression allows us to skip
- * resolves.
- */
- const bool lossless_compression_disabled = INTEL_DEBUG & DEBUG_NO_RBC;
- const bool is_lossless_compressed =
- unlikely(!lossless_compression_disabled) &&
- brw->gen >= 9 && !mt->is_scanout &&
- intel_miptree_supports_ccs_e(brw, mt);
-
- if (is_lossless_compressed) {
- intel_miptree_alloc_ccs(brw, mt, is_lossless_compressed);
+ if (mt->aux_usage == ISL_AUX_USAGE_CCS_E) {
+ if (!intel_miptree_alloc_ccs(brw, mt)) {
+ intel_miptree_release(&mt);
+ return NULL;
}
}
@@ -805,6 +838,21 @@ intel_miptree_create_for_bo(struct brw_context *brw,
mt->offset = offset;
mt->tiling = tiling;
+ if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX)) {
+ intel_miptree_choose_aux_usage(brw, mt);
+
+ /* Since CCS_E can compress more than just clear color, we create the
+ * CCS for it up-front. For CCS_D which only compresses clears, we
+ * create the CCS on-demand when a clear occurs that wants one.
+ */
+ if (mt->aux_usage == ISL_AUX_USAGE_CCS_E) {
+ if (!intel_miptree_alloc_ccs(brw, mt)) {
+ intel_miptree_release(&mt);
+ return NULL;
+ }
+ }
+ }
+
return mt;
}
@@ -849,16 +897,6 @@ intel_update_winsys_renderbuffer_miptree(struct brw_context *intel,
if (!singlesample_mt)
goto fail;
- /* If this miptree is capable of supporting fast color clears, set
- * mcs_state appropriately to ensure that fast clears will occur.
- * Allocation of the MCS miptree will be deferred until the first fast
- * clear actually occurs.
- */
- if (intel_tiling_supports_ccs(intel, singlesample_mt->tiling) &&
- intel_miptree_supports_ccs(intel, singlesample_mt)) {
- singlesample_mt->aux_disable &= ~INTEL_AUX_DISABLE_CCS;
- }
-
if (num_samples == 0) {
intel_miptree_release(&irb->mt);
irb->mt = singlesample_mt;
@@ -913,7 +951,7 @@ intel_miptree_create_for_renderbuffer(struct brw_context *brw,
if (!mt)
goto fail;
- if (intel_miptree_wants_hiz_buffer(brw, mt)) {
+ if (mt->aux_usage == ISL_AUX_USAGE_HIZ) {
ok = intel_miptree_alloc_hiz(brw, mt);
if (!ok)
goto fail;
@@ -1492,7 +1530,7 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
{
assert(brw->gen >= 7); /* MCS only used on Gen7+ */
assert(mt->mcs_buf == NULL);
- assert((mt->aux_disable & INTEL_AUX_DISABLE_MCS) == 0);
+ assert(mt->aux_usage == ISL_AUX_USAGE_MCS);
/* Choose the correct format for the MCS buffer. All that really matters
* is that we allocate the right buffer size, since we'll always be
@@ -1551,11 +1589,11 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
bool
intel_miptree_alloc_ccs(struct brw_context *brw,
- struct intel_mipmap_tree *mt,
- bool is_ccs_e)
+ struct intel_mipmap_tree *mt)
{
assert(mt->mcs_buf == NULL);
- assert(!(mt->aux_disable & (INTEL_AUX_DISABLE_MCS | INTEL_AUX_DISABLE_CCS)));
+ assert(mt->aux_usage == ISL_AUX_USAGE_CCS_E ||
+ mt->aux_usage == ISL_AUX_USAGE_CCS_D);
struct isl_surf temp_main_surf;
struct isl_surf temp_ccs_surf;
@@ -1590,7 +1628,8 @@ intel_miptree_alloc_ccs(struct brw_context *brw,
* not use the gpu access flag which can cause an unnecessary delay if the
* backing pages happened to be just used by the GPU.
*/
- const uint32_t alloc_flags = is_ccs_e ? 0 : BO_ALLOC_FOR_RENDER;
+ const uint32_t alloc_flags =
+ mt->aux_usage == ISL_AUX_USAGE_CCS_E ? 0 : BO_ALLOC_FOR_RENDER;
buf->bo = brw_bo_alloc_tiled(brw->bufmgr, "ccs-miptree", buf->size,
I915_TILING_Y, buf->pitch, alloc_flags);
@@ -1607,7 +1646,7 @@ intel_miptree_alloc_ccs(struct brw_context *brw,
* used for lossless compression which requires similar initialisation
* as multi-sample compression.
*/
- if (is_ccs_e) {
+ if (mt->aux_usage == ISL_AUX_USAGE_CCS_E) {
/* Hardware sets the auxiliary buffer to all zeroes when it does full
* resolve. Initialize it accordingly in case the first renderer is
* cpu (or other none compression aware party).
@@ -1868,36 +1907,11 @@ intel_hiz_miptree_buf_create(struct brw_context *brw,
}
bool
-intel_miptree_wants_hiz_buffer(struct brw_context *brw,
- struct intel_mipmap_tree *mt)
-{
- if (!brw->has_hiz)
- return false;
-
- if (mt->hiz_buf != NULL)
- return false;
-
- if (mt->aux_disable & INTEL_AUX_DISABLE_HIZ)
- return false;
-
- switch (mt->format) {
- case MESA_FORMAT_Z_FLOAT32:
- case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
- case MESA_FORMAT_Z24_UNORM_X8_UINT:
- case MESA_FORMAT_Z24_UNORM_S8_UINT:
- case MESA_FORMAT_Z_UNORM16:
- return true;
- default:
- return false;
- }
-}
-
-bool
intel_miptree_alloc_hiz(struct brw_context *brw,
struct intel_mipmap_tree *mt)
{
assert(mt->hiz_buf == NULL);
- assert((mt->aux_disable & INTEL_AUX_DISABLE_HIZ) == 0);
+ assert(mt->aux_usage == ISL_AUX_USAGE_HIZ);
enum isl_aux_state **aux_state =
create_aux_state_map(mt, ISL_AUX_STATE_AUX_INVALID);
@@ -2016,7 +2030,7 @@ intel_miptree_check_color_resolve(const struct brw_context *brw,
unsigned level, unsigned layer)
{
- if ((mt->aux_disable & INTEL_AUX_DISABLE_CCS) || !mt->mcs_buf)
+ if (!mt->mcs_buf)
return;
/* Fast color clear is supported for mipmapped surfaces only on Gen8+. */
@@ -2645,7 +2659,6 @@ intel_miptree_make_shareable(struct brw_context *brw,
0, INTEL_REMAINING_LAYERS, false, false);
if (mt->mcs_buf) {
- mt->aux_disable |= (INTEL_AUX_DISABLE_CCS | INTEL_AUX_DISABLE_MCS);
brw_bo_unreference(mt->mcs_buf->bo);
free(mt->mcs_buf);
mt->mcs_buf = NULL;
@@ -2659,7 +2672,6 @@ intel_miptree_make_shareable(struct brw_context *brw,
}
if (mt->hiz_buf) {
- mt->aux_disable |= INTEL_AUX_DISABLE_HIZ;
intel_miptree_hiz_buffer_free(mt->hiz_buf);
mt->hiz_buf = NULL;
@@ -2674,6 +2686,8 @@ intel_miptree_make_shareable(struct brw_context *brw,
free(mt->aux_state);
mt->aux_state = NULL;
}
+
+ mt->aux_usage = ISL_AUX_USAGE_NONE;
}
@@ -3716,17 +3730,7 @@ intel_miptree_get_aux_isl_surf(struct brw_context *brw,
aux_pitch = mt->mcs_buf->pitch;
aux_qpitch = mt->mcs_buf->qpitch;
- if (mt->num_samples > 1) {
- assert(mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS);
- *usage = ISL_AUX_USAGE_MCS;
- } else if (intel_miptree_is_lossless_compressed(brw, mt)) {
- assert(brw->gen >= 9);
- *usage = ISL_AUX_USAGE_CCS_E;
- } else if ((mt->aux_disable & INTEL_AUX_DISABLE_CCS) == 0) {
- *usage = ISL_AUX_USAGE_CCS_D;
- } else {
- unreachable("Invalid MCS miptree");
- }
+ *usage = mt->aux_usage;
} else if (mt->hiz_buf) {
aux_pitch = mt->hiz_buf->aux_base.pitch;
aux_qpitch = mt->hiz_buf->aux_base.qpitch;
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index aa33967..f34be9a 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -287,16 +287,6 @@ enum miptree_array_layout {
GEN6_HIZ_STENCIL,
};
-enum intel_aux_disable {
- INTEL_AUX_DISABLE_NONE = 0,
- INTEL_AUX_DISABLE_HIZ = 1 << 1,
- INTEL_AUX_DISABLE_MCS = 1 << 2,
- INTEL_AUX_DISABLE_CCS = 1 << 3,
- INTEL_AUX_DISABLE_ALL = INTEL_AUX_DISABLE_HIZ |
- INTEL_AUX_DISABLE_MCS |
- INTEL_AUX_DISABLE_CCS
-};
-
/**
* Miptree aux buffer. These buffers are associated with a miptree, but the
* format is managed by the hardware.
@@ -576,6 +566,25 @@ struct intel_mipmap_tree
struct intel_miptree_hiz_buffer *hiz_buf;
/**
+ * \brief The type of auxiliary compression used by this miptree.
+ *
+ * This describes the type of auxiliary compression that is intended to be
+ * used by this miptree. An aux usage of ISL_AUX_USAGE_NONE means that
+ * auxiliary compression is permanently disabled. An aux usage other than
+ * ISL_AUX_USAGE_NONE does not imply that the auxiliary buffer has actually
+ * been allocated nor does it imply that auxiliary compression will always
+ * be enabled for this surface. For instance, with CCS_D, we may allocate
+ * the CCS on-the-fly and it may not be used for texturing if the miptree
+ * is fully resolved.
+ */
+ enum isl_aux_usage aux_usage;
+
+ /**
+ * \brief Whether or not this miptree supports fast clears.
+ */
+ bool supports_fast_clear;
+
+ /**
* \brief Maps miptree slices to their current aux state
*
* This two-dimensional array is indexed as [level][layer] and stores an
@@ -631,13 +640,6 @@ struct intel_mipmap_tree
union isl_color_value fast_clear_color;
/**
- * Disable allocation of auxiliary buffers, such as the HiZ buffer and MCS
- * buffer. This is useful for sharing the miptree bo with an external client
- * that doesn't understand auxiliary buffers.
- */
- enum intel_aux_disable aux_disable;
-
- /**
* Tells if the underlying buffer is to be also consumed by entities other
* than the driver. This allows logic to turn off features such as lossless
* compression which is not currently understood by client applications.
@@ -655,8 +657,7 @@ intel_miptree_is_lossless_compressed(const struct brw_context *brw,
bool
intel_miptree_alloc_ccs(struct brw_context *brw,
- struct intel_mipmap_tree *mt,
- bool is_ccs_e);
+ struct intel_mipmap_tree *mt);
enum {
MIPTREE_LAYOUT_ACCELERATED_UPLOAD = 1 << 0,
@@ -814,10 +815,6 @@ intel_miptree_copy_teximage(struct brw_context *brw,
* functions on a miptree without HiZ. In that case, each function is a no-op.
*/
-bool
-intel_miptree_wants_hiz_buffer(struct brw_context *brw,
- struct intel_mipmap_tree *mt);
-
/**
* \brief Allocate the miptree's embedded HiZ miptree.
* \see intel_mipmap_tree:hiz_mt
--
2.5.0.400.gff86faf
More information about the mesa-dev
mailing list