[Mesa-dev] [PATCH] i965/gen7: workaround alpha blending problems with CMS MSAA buffers.
Paul Berry
stereotype441 at gmail.com
Mon Nov 4 14:24:17 PST 2013
i965/gen7 hardware doesn't perform alpha blending correctly when
compressed (CMS) multisampled buffers are in use. Therefore, we need
to detect when alpha blending is used on a compressed multisampled
buffer, and convert the buffer to uncompressed (UMS) layout.
Once a given buffer has been converted from CMS to UMS, we leave it in
UMS mode forever after, because (a) the conversion is expensive, and
(b) it seems likely that if alpha blending is performed on a buffer
once, it will probably be performed again on subsequent frames.
Fixes glitches in GTA San Andreas under Wine.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=53077
---
src/mesa/drivers/dri/i965/brw_blorp.h | 14 ++++
src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 51 +++++++++++++
src/mesa/drivers/dri/i965/brw_context.h | 1 +
src/mesa/drivers/dri/i965/brw_draw.c | 5 +-
src/mesa/drivers/dri/i965/brw_misc_state.c | 104 +++++++++++++++++++++++++++
src/mesa/drivers/dri/i965/gen6_cc.c | 2 +
src/mesa/drivers/dri/i965/gen7_blorp.cpp | 2 +-
7 files changed, 176 insertions(+), 3 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h
index 07ab805..bbc610d 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp.h
@@ -54,6 +54,9 @@ void
brw_blorp_resolve_color(struct brw_context *brw,
struct intel_mipmap_tree *mt);
+void
+brw_blorp_cms_to_ums(struct brw_context *brw, struct intel_mipmap_tree *mt);
+
#ifdef __cplusplus
} /* end extern "C" */
@@ -356,6 +359,17 @@ public:
virtual uint32_t get_wm_prog(struct brw_context *brw,
brw_blorp_prog_data **prog_data) const;
+ /**
+ * Force the destination to use uncompressed multisampling
+ * (INTEL_MSAA_LAYOUT_UMS) instead of compressed multisampling. This is
+ * used when we are converting a framebuffer from an uncompressed to a
+ * compressed layout.
+ */
+ void force_dst_to_ums()
+ {
+ this->dst.msaa_layout = INTEL_MSAA_LAYOUT_UMS;
+ }
+
private:
brw_blorp_blit_prog_key wm_prog_key;
};
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 7e436f7..6d558d8 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -167,6 +167,57 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
intel_miptree_slice_set_needs_hiz_resolve(dst_mt, dst_level, dst_layer);
}
+
+/**
+ * Convert a multisampled framebuffer from compressed layout
+ * (INTEL_MSAA_LAYOUT_CMS) to uncompressed layout (INTEL_MSAA_LAYOUT_UMS).
+ *
+ * The conversion happens in place--no extra buffer needs to be allocated.
+ */
+void
+brw_blorp_cms_to_ums(struct brw_context *brw, struct intel_mipmap_tree *mt)
+{
+ assert(mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS);
+
+ /* Multisampled buffers aren't mipmapped, so the code below only has to fix
+ * up miplevel 0.
+ */
+ assert(mt->first_level == 0 && mt->last_level == 0);
+
+ /* Cubemaps can't be multisampled, so the code below doesn't have to
+ * correct by a factor of 6 when counting layers.
+ */
+ assert(mt->target != GL_TEXTURE_CUBE_MAP);
+
+ for (unsigned layer = 0; layer < mt->logical_depth0; layer++) {
+ /* Note: normally blitting from one miptree to itself is undefined if
+ * the source and destination rectangles overlap, because (a) the 3D
+ * pipeline doesn't make any guarantee as to what order the pixels are
+ * processed in, and (b) the source and destination rectangles are
+ * accessed through different (non-coherent) caches. However, in this
+ * case it's ok, since the source and destination rectangles are
+ * identical, so (a) order doesn't matter because every pixel is copied
+ * to itself, and (b) cache coherency isn't a problem because every
+ * pixel is read exactly once and then written exactly once.
+ */
+ brw_blorp_blit_params params(brw,
+ mt, 0, layer,
+ mt, 0, layer,
+ 0, 0,
+ mt->logical_width0, mt->logical_height0,
+ 0, 0,
+ mt->logical_width0, mt->logical_height0,
+ GL_NEAREST, false, false);
+ params.force_dst_to_ums();
+ brw_blorp_exec(brw, ¶ms);
+ }
+
+ intel_miptree_release(&mt->mcs_mt);
+ mt->msaa_layout = INTEL_MSAA_LAYOUT_UMS;
+ mt->mcs_state = INTEL_MCS_STATE_NONE;
+}
+
+
static void
do_blorp_blit(struct brw_context *brw, GLbitfield buffer_bit,
struct intel_renderbuffer *src_irb,
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 7aacf4f..85cb567 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1466,6 +1466,7 @@ void brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt,
uint32_t *out_tile_mask_y);
void brw_workaround_depthstencil_alignment(struct brw_context *brw,
GLbitfield clear_mask);
+void brw_workaround_cms_blending(struct brw_context *brw);
/* brw_object_purgeable.c */
void brw_init_object_purgeable_functions(struct dd_function_table *functions);
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 0acd089..50c0ddc 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -355,10 +355,11 @@ static bool brw_try_draw_prims( struct gl_context *ctx,
intel_prepare_render(brw);
- /* This workaround has to happen outside of brw_upload_state() because it
- * may flush the batchbuffer for a blit, affecting the state flags.
+ /* These workarounds have to happen outside of brw_upload_state() because
+ * they may flush the batchbuffer for a blit, affecting the state flags.
*/
brw_workaround_depthstencil_alignment(brw, 0);
+ brw_workaround_cms_blending(brw);
/* Resolves must occur after updating renderbuffers, updating context state,
* and finalizing textures but before setting up any hardware state for
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 70b0dbd..95836fa 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -36,6 +36,7 @@
#include "intel_mipmap_tree.h"
#include "intel_regions.h"
+#include "brw_blorp.h"
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
@@ -1081,3 +1082,106 @@ const struct brw_tracked_state brw_state_base_address = {
},
.emit = upload_state_base_address
};
+
+
+/**
+ * Return true if the color buffer indicated by \c b needs the CMS blending
+ * workaround applied to it.
+ *
+ * When the CMS MSAA layout is in use, the hardware's pixel backend always
+ * seems to write new color values into the buffer correctly, but it doesn't
+ * always read old values from the buffer correctly. Therefore, whenever the
+ * blending/logic-op mode causes the final value of covered samples to depend
+ * on the old values of those samples, we need to do a workaround.
+ */
+static inline bool
+cms_blending_workaround_needed(struct brw_context *brw, int b)
+{
+ struct gl_context *ctx = &brw->ctx;
+ struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[b];
+ struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+ if (irb->mt->msaa_layout != INTEL_MSAA_LAYOUT_CMS)
+ return false;
+
+ /* _NEW_COLOR */
+ if (ctx->Color.ColorLogicOpEnabled) {
+ switch (ctx->Color.LogicOp) {
+ case GL_CLEAR:
+ case GL_COPY:
+ case GL_COPY_INVERTED:
+ case GL_SET:
+ /* In these modes, the final sample value depends only on the
+ * "source" value from the shader; they don't depend on the
+ * destination sample value.
+ */
+ return false;
+ default:
+ return true;
+ }
+ }
+
+ if (!(ctx->Color.BlendEnabled & (1 << b)))
+ return false;
+
+ GLenum eqRGB = ctx->Color.Blend[b].EquationRGB;
+ if (eqRGB == GL_MIN || eqRGB == GL_MAX)
+ return true;
+ switch (ctx->Color.Blend[b].DstRGB) {
+ case GL_ZERO:
+ break;
+ case GL_ONE_MINUS_DST_ALPHA:
+ /* If the buffer is missing an alpha channel, alpha is effectively
+ * always 1.0, so GL_ONE_MINUS_DST_ALPHA mode reduces to GL_ZERO mode.
+ */
+ return _mesa_base_format_has_channel(rb->_BaseFormat,
+ GL_TEXTURE_ALPHA_TYPE);
+ default:
+ return true;
+ }
+
+ if (_mesa_base_format_has_channel(rb->_BaseFormat, GL_TEXTURE_ALPHA_TYPE)) {
+ GLenum eqA = ctx->Color.Blend[b].EquationA;
+ if (eqA == GL_MIN || eqA == GL_MAX)
+ return true;
+ if (ctx->Color.Blend[b].DstA != GL_ZERO)
+ return true;
+ }
+
+ return false;
+}
+
+
+/**
+ * When the CMS MSAA layout is in use, the hardware's pixel backend always
+ * seems to write new color values into the buffer correctly, but it doesn't
+ * always read old values from the buffer correctly.
+ *
+ * Therefore, if we're rendering in a way that causes the pixel backend to
+ * read color values from the buffer, we need to first convert from the CMS to
+ * the UMS layout.
+ */
+void
+brw_workaround_cms_blending(struct brw_context *brw)
+{
+ struct gl_context *ctx = &brw->ctx;
+
+ if (!(brw->NewGLState & (_NEW_BUFFERS | _NEW_COLOR)))
+ return;
+
+ /* Prior to Gen7, compressed multisampling wasn't supported. Hopefully
+ * this bug is fixed in Gen8.
+ */
+ if (brw->gen != 7)
+ return;
+
+ /* _NEW_BUFFERS */
+ int nr_draw_buffers = ctx->DrawBuffer->_NumColorDrawBuffers;
+ for (int b = 0; b < nr_draw_buffers; b++) {
+ if (!cms_blending_workaround_needed(brw, b))
+ continue;
+
+ struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[b];
+ struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+ brw_blorp_cms_to_ums(brw, irb->mt);
+ }
+}
diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c
index 45c926c..f1222a3 100644
--- a/src/mesa/drivers/dri/i965/gen6_cc.c
+++ b/src/mesa/drivers/dri/i965/gen6_cc.c
@@ -30,11 +30,13 @@
#include "brw_defines.h"
#include "brw_util.h"
#include "intel_batchbuffer.h"
+#include "intel_fbo.h"
#include "main/macros.h"
#include "main/enums.h"
#include "main/glformats.h"
#include "main/stencil.h"
+
static void
gen6_upload_blend_state(struct brw_context *brw)
{
diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
index 71f31b7..d66e5cc 100644
--- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
@@ -196,7 +196,7 @@ gen7_blorp_emit_surface_state(struct brw_context *brw,
surf[3] = pitch_bytes - 1;
surf[4] = gen7_surface_msaa_bits(surface->num_samples, surface->msaa_layout);
- if (surface->mt->mcs_mt) {
+ if (surface->mt->mcs_mt && surface->msaa_layout != INTEL_MSAA_LAYOUT_UMS) {
gen7_set_surface_mcs_info(brw, surf, wm_surf_offset, surface->mt->mcs_mt,
is_render_target);
}
--
1.8.4.2
More information about the mesa-dev
mailing list