[Mesa-dev] [PATCH 11/15] i965/msaa: Properly handle sliced layout for Gen7.
Ian Romanick
idr at freedesktop.org
Tue May 22 10:38:29 PDT 2012
On 05/11/2012 11:03 AM, Paul Berry wrote:
> Starting in Gen7, there are two possible layouts for MSAA surfaces:
>
> - Interleaved, in which additional samples are accommodated by scaling
> up the width and height of the surface. This is the only layout
> available in Gen6. On Gen7 it is used for depth and stencil
> surfaces only.
>
> - Sliced, in which the surface is stored as a 2D array, with array
> slice n containing all pixel data for sample n. On Gen7 this layout
> is used for color surfaces.
>
> The "Sliced" layout has an additional requirement: it must be used in
> ARYSPC_LOD0 mode, which means that the surface doesn't leave any extra
> room between array slices for miplevels other than 0.
>
> This patch modifies the surface allocation functions to use the
> correct layout when allocating MSAA surfaces in Gen7, and to set the
> array offsets properly when using ARYSPC_LOD0 mode. It also modifies
> the code that populates SURFACE_STATE structures to ensure that
> ARYSPC_LOD0 mode is selected in the appropriate circumstances.
> ---
> src/mesa/drivers/dri/i965/brw_blorp.cpp | 1 +
> src/mesa/drivers/dri/i965/brw_blorp.h | 5 +
> src/mesa/drivers/dri/i965/brw_tex_layout.c | 10 ++-
> src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 3 +-
> src/mesa/drivers/dri/i965/gen7_blorp.cpp | 2 +
> src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 10 ++
> src/mesa/drivers/dri/intel/intel_mipmap_tree.c | 102 ++++++++++++++++-----
> src/mesa/drivers/dri/intel/intel_mipmap_tree.h | 23 +++++-
> src/mesa/drivers/dri/intel/intel_tex_image.c | 3 +-
> src/mesa/drivers/dri/intel/intel_tex_validate.c | 3 +-
> 10 files changed, 134 insertions(+), 28 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp
> index f6aff44..84f92b4 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp
> @@ -58,6 +58,7 @@ brw_blorp_surface_info::set(struct intel_mipmap_tree *mt,
> {
> brw_blorp_mip_info::set(mt, level, layer);
> this->num_samples = mt->num_samples;
> + this->array_spacing_lod0 = mt->array_spacing_lod0;
>
> if (mt->format == MESA_FORMAT_S8) {
> /* The miptree is a W-tiled stencil buffer. Surface states can't be set
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h
> index 8dabf2c..3e37081 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp.h
> +++ b/src/mesa/drivers/dri/i965/brw_blorp.h
> @@ -99,6 +99,11 @@ public:
> bool map_stencil_as_y_tiled;
>
> unsigned num_samples;
> +
> + /* Setting this flag indicates that the surface should be set up in
> + * ARYSPC_LOD0 mode. Ignored prior to Gen7.
> + */
> + bool array_spacing_lod0;
If this structure is used in some hash key calculations, all of the bool
fields should be grouped together for better packing. If it's not used
in a hash key, it probably doesn't matter.
> };
>
>
> diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c
> index 8bf1d3d..f742131 100644
> --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
> +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
> @@ -49,7 +49,10 @@ brw_miptree_layout_texture_array(struct intel_context *intel,
>
> h0 = ALIGN(mt->height0, mt->align_h);
> h1 = ALIGN(minify(mt->height0), mt->align_h);
> - qpitch = (h0 + h1 + (intel->gen>= 7 ? 12 : 11) * mt->align_h);
> + if (mt->array_spacing_lod0)
> + qpitch = h0;
> + else
> + qpitch = (h0 + h1 + (intel->gen>= 7 ? 12 : 11) * mt->align_h);
> if (mt->compressed)
> qpitch /= 4;
>
> @@ -165,7 +168,10 @@ brw_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree *mt)
> break;
>
> default:
> - i945_miptree_layout_2d(mt);
> + if (mt->num_samples> 0&& !mt->msaa_is_interleaved)
> + brw_miptree_layout_texture_array(intel, mt);
> + else
> + i945_miptree_layout_2d(mt);
> break;
> }
> DBG("%s: %dx%dx%d\n", __FUNCTION__,
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> index 849da85..6e745cf 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> @@ -955,7 +955,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
> intel_image->base.Base.Level,
> width, height, depth,
> true,
> - 0 /* num_samples */);
> + 0 /* num_samples */,
> + false /* msaa_is_interleaved */);
>
> intel_miptree_copy_teximage(intel, intel_image, new_mt);
> intel_miptree_reference(&irb->mt, intel_image->mt);
> diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
> index 3775eec..dfc2272 100644
> --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
> +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
> @@ -149,6 +149,8 @@ gen7_blorp_emit_surface_state(struct brw_context *brw,
>
> surf->ss0.surface_format = format;
> surf->ss0.surface_type = BRW_SURFACE_2D;
> + surf->ss0.surface_array_spacing = surface->array_spacing_lod0 ?
> + GEN7_SURFACE_ARYSPC_LOD0 : GEN7_SURFACE_ARYSPC_FULL;
>
> /* reloc */
> surf->ss1.base_addr = region->bo->offset; /* No tile offsets needed */
> diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
> index 5aa62bd..9a01ddf 100644
> --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
> @@ -142,6 +142,10 @@ gen7_update_texture_surface(struct gl_context *ctx, GLuint unit)
> return;
> }
>
> + /* We don't support MSAA for textures. */
> + assert(!mt->array_spacing_lod0);
> + assert(mt->num_samples == 0);
> +
> intel_miptree_get_dimensions_for_image(firstImage,&width,&height,&depth);
>
> surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
> @@ -296,6 +300,9 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
> sizeof(*surf), 32,&brw->wm.surf_offset[unit]);
> memset(surf, 0, sizeof(*surf));
>
> + /* Render targets can't use MSAA interleaved layout */
> + assert(!irb->mt->msaa_is_interleaved);
> +
> if (irb->mt->align_h == 4)
> surf->ss0.vertical_alignment = 1;
> if (irb->mt->align_w == 8)
> @@ -324,6 +331,9 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
> }
>
> surf->ss0.surface_type = BRW_SURFACE_2D;
> + surf->ss0.surface_array_spacing = irb->mt->array_spacing_lod0 ?
> + GEN7_SURFACE_ARYSPC_LOD0 : GEN7_SURFACE_ARYSPC_FULL;
> +
> /* reloc */
> surf->ss1.base_addr = intel_renderbuffer_tile_offsets(irb,&tile_x,&tile_y);
> surf->ss1.base_addr += region->bo->offset; /* reloc */
> diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
> index 36d7b77..4132351 100644
> --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
> @@ -73,7 +73,8 @@ intel_miptree_create_internal(struct intel_context *intel,
> GLuint height0,
> GLuint depth0,
> bool for_region,
> - GLuint num_samples)
> + GLuint num_samples,
> + bool msaa_is_interleaved)
> {
> struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
> int compress_byte = 0;
> @@ -95,8 +96,14 @@ intel_miptree_create_internal(struct intel_context *intel,
> mt->cpp = compress_byte ? compress_byte : _mesa_get_format_bytes(mt->format);
> mt->num_samples = num_samples;
> mt->compressed = compress_byte ? 1 : 0;
> + mt->msaa_is_interleaved = msaa_is_interleaved;
> mt->refcount = 1;
>
> + /* array_spacing_lod0 is only used for non-interleaved MSAA surfaces.
> + * TODO: can we use it elsewhere?
> + */
> + mt->array_spacing_lod0 = num_samples> 0&& !msaa_is_interleaved;
> +
> if (target == GL_TEXTURE_CUBE_MAP) {
> assert(depth0 == 1);
> mt->depth0 = 6;
> @@ -109,6 +116,8 @@ intel_miptree_create_internal(struct intel_context *intel,
> (intel->must_use_separate_stencil ||
> (intel->has_separate_stencil&&
> intel->vtbl.is_hiz_depth_format(intel, format)))) {
> + /* MSAA stencil surfaces are always interleaved. */
> + bool msaa_is_interleaved = num_samples> 0;
> mt->stencil_mt = intel_miptree_create(intel,
> mt->target,
> MESA_FORMAT_S8,
> @@ -118,7 +127,8 @@ intel_miptree_create_internal(struct intel_context *intel,
> mt->height0,
> mt->depth0,
> true,
> - num_samples);
> + num_samples,
> + msaa_is_interleaved);
> if (!mt->stencil_mt) {
> intel_miptree_release(&mt);
> return NULL;
> @@ -165,7 +175,8 @@ intel_miptree_create(struct intel_context *intel,
> GLuint height0,
> GLuint depth0,
> bool expect_accelerated_upload,
> - GLuint num_samples)
> + GLuint num_samples,
> + bool msaa_is_interleaved)
> {
> struct intel_mipmap_tree *mt;
> uint32_t tiling = I915_TILING_NONE;
> @@ -207,7 +218,7 @@ intel_miptree_create(struct intel_context *intel,
> mt = intel_miptree_create_internal(intel, target, format,
> first_level, last_level, width0,
> height0, depth0,
> - false, num_samples);
> + false, num_samples, msaa_is_interleaved);
> /*
> * pitch == 0 || height == 0 indicates the null texture
> */
> @@ -243,7 +254,8 @@ intel_miptree_create_for_region(struct intel_context *intel,
> mt = intel_miptree_create_internal(intel, target, format,
> 0, 0,
> region->width, region->height, 1,
> - true, 0 /* num_samples */);
> + true, 0 /* num_samples */,
> + false /* msaa_is_interleaved */);
> if (!mt)
> return mt;
>
> @@ -252,6 +264,31 @@ intel_miptree_create_for_region(struct intel_context *intel,
> return mt;
> }
>
> +/**
> + * Determine whether the MSAA surface being created should use an interleaved
> + * layout or a sliced layout, based on the chip generation and the surface
> + * type.
> + */
> +static bool
> +msaa_format_is_interleaved(struct intel_context *intel, gl_format format)
> +{
> + /* Prior to Gen7, all surfaces used interleaved layout. */
> + if (intel->gen< 7)
> + return true;
> +
> + /* In Gen7, interleaved layout is only used for depth and stencil
> + * buffers.
> + */
> + switch (_mesa_get_format_base_format(format)) {
> + case GL_DEPTH_COMPONENT:
> + case GL_STENCIL_INDEX:
> + case GL_DEPTH_STENCIL:
> + return true;
> + default:
> + return false;
> + }
> +}
> +
> struct intel_mipmap_tree*
> intel_miptree_create_for_renderbuffer(struct intel_context *intel,
> gl_format format,
> @@ -260,25 +297,43 @@ intel_miptree_create_for_renderbuffer(struct intel_context *intel,
> uint32_t num_samples)
> {
> struct intel_mipmap_tree *mt;
> -
> - /* Adjust width/height for MSAA. Note: since samples are interleaved in a
> - * complex pattern that repeats for every 2x2 block of pixels, we need to
> - * expand the width and height to even numbers before the width/height
> - * adjustment, otherwise some of the samples in the last row and column
> - * will fall outside the boundary of the texture.
> - */
> - if (num_samples> 4) {
> - num_samples = 8;
> - width = ALIGN(width, 2) * 4;
> - height = ALIGN(height, 2) * 2;
> - } else if (num_samples> 0) {
> - num_samples = 4;
> - width = ALIGN(width, 2) * 2;
> - height = ALIGN(height, 2) * 2;
> + uint32_t depth = 1;
> + bool msaa_is_interleaved = false;
> +
> + if (num_samples> 0) {
> + /* Adjust width/height/depth for MSAA */
> + msaa_is_interleaved = msaa_format_is_interleaved(intel, format);
> + if (msaa_is_interleaved) {
> + /* Note: since samples are interleaved in a complex pattern that
> + * repeats for every 2x2 block of pixels, we need to expand the width
> + * and height to even numbers before the width/height adjustment,
> + * otherwise some of the samples in the last row and column will fall
> + * outside the boundary of the texture.
> + */
> + switch (num_samples) {
> + case 4:
> + width = ALIGN(width, 2) * 2;
> + height = ALIGN(height, 2) * 2;
> + break;
> + case 8:
> + width = ALIGN(width, 2) * 4;
> + height = ALIGN(height, 2) * 2;
> + break;
It seems like this would be better as
width = ALIGN(width, 2) * num_samples / 2;
height = ALIGN(height, 2) * 2;
> + default:
> + /* num_samples should already have been quantized to 0, 4, or
> + * 8.
> + */
> + assert(false);
> + }
> + } else {
> + /* Non-interleaved */
> + depth = num_samples;
> + }
> }
>
> mt = intel_miptree_create(intel, GL_TEXTURE_2D, format, 0, 0,
> - width, height, 1, true, num_samples);
> + width, height, depth, true, num_samples,
> + msaa_is_interleaved);
>
> return mt;
> }
> @@ -552,6 +607,8 @@ intel_miptree_alloc_hiz(struct intel_context *intel,
> GLuint num_samples)
> {
> assert(mt->hiz_mt == NULL);
> + /* MSAA HiZ surfaces are always interleaved. */
> + bool msaa_is_interleaved = num_samples> 0;
> mt->hiz_mt = intel_miptree_create(intel,
> mt->target,
> MESA_FORMAT_X8_Z24,
> @@ -561,7 +618,8 @@ intel_miptree_alloc_hiz(struct intel_context *intel,
> mt->height0,
> mt->depth0,
> true,
> - num_samples);
> + num_samples,
> + msaa_is_interleaved);
>
> if (!mt->hiz_mt)
> return false;
> diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
> index ca1666d..3883d2b 100644
> --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
> +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
> @@ -172,6 +172,26 @@ struct intel_mipmap_tree
> GLuint num_samples;
> bool compressed;
>
> + /**
> + * For 1D array, 2D array, cube, and 2D multisampled surfaces on Gen7: true
> + * if the surface only contains LOD 0, and hence no space is for LOD's
> + * other than 0 in between array slices.
> + *
> + * Corresponds to the surface_array_spacing bit in gen7_surface_state.
> + */
> + bool array_spacing_lod0;
> +
> + /**
> + * For MSAA buffers, there are two possible layouts:
> + * - Interleaved, in which the additional samples are accommodated
> + * by scaling up the width and height of the surface.
> + * - Sliced, in which the surface is stored as a 2D array, with
> + * array slice n containing all pixel data for sample n.
> + *
> + * This value is true if num_samples> 0 and the format is interleaved.
> + */
> + bool msaa_is_interleaved;
> +
> /* Derived from the above:
> */
> GLuint total_width;
> @@ -233,7 +253,8 @@ struct intel_mipmap_tree *intel_miptree_create(struct intel_context *intel,
> GLuint height0,
> GLuint depth0,
> bool expect_accelerated_upload,
> - GLuint num_samples);
> + GLuint num_samples,
> + bool msaa_is_interleaved);
>
> struct intel_mipmap_tree *
> intel_miptree_create_for_region(struct intel_context *intel,
> diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c
> index bb2217f..f5d075b 100644
> --- a/src/mesa/drivers/dri/intel/intel_tex_image.c
> +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c
> @@ -100,7 +100,8 @@ intel_miptree_create_for_teximage(struct intel_context *intel,
> height,
> depth,
> expect_accelerated_upload,
> - 0 /* num_samples */);
> + 0 /* num_samples */,
> + false /* msaa_is_interleaved */);
> }
>
> /* There are actually quite a few combinations this will work for,
> diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c
> index cadba29..c2d7d67 100644
> --- a/src/mesa/drivers/dri/intel/intel_tex_validate.c
> +++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c
> @@ -87,7 +87,8 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
> height,
> depth,
> true,
> - 0 /* num_samples */);
> + 0 /* num_samples */,
> + false /* msaa_is_interleaved */);
> if (!intelObj->mt)
> return false;
> }
More information about the mesa-dev
mailing list