Mesa (master): i965/msaa: Properly handle sliced layout for Gen7.

Paul Berry stereotype441 at kemper.freedesktop.org
Fri May 25 15:45:33 UTC 2012


Module: Mesa
Branch: master
Commit: 455ac562722f60ac9fb0c3d3c697fa339fa011ad
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=455ac562722f60ac9fb0c3d3c697fa339fa011ad

Author: Paul Berry <stereotype441 at gmail.com>
Date:   Tue May  8 13:39:10 2012 -0700

i965/msaa: Properly handle sliced layout for Gen7.

Starting in Gen7, there are two possible layouts for MSAA surfaces:

- Interleaved, in which additional samples are accommodated by scaling
  up the width and height of the surface.  This is the only layout
  available in Gen6.  On Gen7 it is used for depth and stencil
  surfaces only.

- Sliced, in which the surface is stored as a 2D array, with array
  slice n containing all pixel data for sample n.  On Gen7 this layout
  is used for color surfaces.

The "Sliced" layout has an additional requirement: it must be used in
ARYSPC_LOD0 mode, which means that the surface doesn't leave any extra
room between array slices for miplevels other than 0.

This patch modifies the surface allocation functions to use the
correct layout when allocating MSAA surfaces in Gen7, and to set the
array offsets properly when using ARYSPC_LOD0 mode.  It also modifies
the code that populates SURFACE_STATE structures to ensure that
ARYSPC_LOD0 mode is selected in the appropriate circumstances.

Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

---

 src/mesa/drivers/dri/i965/brw_blorp.cpp           |    1 +
 src/mesa/drivers/dri/i965/brw_blorp.h             |    5 +
 src/mesa/drivers/dri/i965/brw_tex_layout.c        |   10 +-
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c  |    3 +-
 src/mesa/drivers/dri/i965/gen7_blorp.cpp          |    2 +
 src/mesa/drivers/dri/i965/gen7_wm_surface_state.c |   10 ++
 src/mesa/drivers/dri/intel/intel_mipmap_tree.c    |  160 ++++++++++++++-------
 src/mesa/drivers/dri/intel/intel_mipmap_tree.h    |   23 +++-
 src/mesa/drivers/dri/intel/intel_tex_image.c      |    3 +-
 src/mesa/drivers/dri/intel/intel_tex_validate.c   |    3 +-
 10 files changed, 162 insertions(+), 58 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp
index d80e4f1..09b176b 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp
@@ -58,6 +58,7 @@ brw_blorp_surface_info::set(struct intel_mipmap_tree *mt,
 {
    brw_blorp_mip_info::set(mt, level, layer);
    this->num_samples = mt->num_samples;
+   this->array_spacing_lod0 = mt->array_spacing_lod0;
 
    if (mt->format == MESA_FORMAT_S8) {
       /* The miptree is a W-tiled stencil buffer.  Surface states can't be set
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h
index 6af5f67..f4aafd7 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp.h
@@ -85,6 +85,11 @@ public:
    bool map_stencil_as_y_tiled;
 
    unsigned num_samples;
+
+   /* Setting this flag indicates that the surface should be set up in
+    * ARYSPC_LOD0 mode.  Ignored prior to Gen7.
+    */
+   bool array_spacing_lod0;
 };
 
 
diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c
index 8bf1d3d..f742131 100644
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -49,7 +49,10 @@ brw_miptree_layout_texture_array(struct intel_context *intel,
 
    h0 = ALIGN(mt->height0, mt->align_h);
    h1 = ALIGN(minify(mt->height0), mt->align_h);
-   qpitch = (h0 + h1 + (intel->gen >= 7 ? 12 : 11) * mt->align_h);
+   if (mt->array_spacing_lod0)
+      qpitch = h0;
+   else
+      qpitch = (h0 + h1 + (intel->gen >= 7 ? 12 : 11) * mt->align_h);
    if (mt->compressed)
       qpitch /= 4;
 
@@ -165,7 +168,10 @@ brw_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree *mt)
       break;
 
    default:
-      i945_miptree_layout_2d(mt);
+      if (mt->num_samples > 0 && !mt->msaa_is_interleaved)
+         brw_miptree_layout_texture_array(intel, mt);
+      else
+         i945_miptree_layout_2d(mt);
       break;
    }
    DBG("%s: %dx%dx%d\n", __FUNCTION__,
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 849da85..6e745cf 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -955,7 +955,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
 				       intel_image->base.Base.Level,
 				       width, height, depth,
 				       true,
-                                       0 /* num_samples */);
+                                       0 /* num_samples */,
+                                       false /* msaa_is_interleaved */);
 
 	 intel_miptree_copy_teximage(intel, intel_image, new_mt);
 	 intel_miptree_reference(&irb->mt, intel_image->mt);
diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
index 9e1aa4b..58da389 100644
--- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
@@ -169,6 +169,8 @@ gen7_blorp_emit_surface_state(struct brw_context *brw,
 
    surf->ss0.surface_format = format;
    surf->ss0.surface_type = BRW_SURFACE_2D;
+   surf->ss0.surface_array_spacing = surface->array_spacing_lod0 ?
+      GEN7_SURFACE_ARYSPC_LOD0 : GEN7_SURFACE_ARYSPC_FULL;
 
    /* reloc */
    surf->ss1.base_addr = region->bo->offset; /* No tile offsets needed */
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
index 5aa62bd..9a01ddf 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
@@ -142,6 +142,10 @@ gen7_update_texture_surface(struct gl_context *ctx, GLuint unit)
       return;
    }
 
+   /* We don't support MSAA for textures. */
+   assert(!mt->array_spacing_lod0);
+   assert(mt->num_samples == 0);
+
    intel_miptree_get_dimensions_for_image(firstImage, &width, &height, &depth);
 
    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
@@ -296,6 +300,9 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
 			  sizeof(*surf), 32, &brw->wm.surf_offset[unit]);
    memset(surf, 0, sizeof(*surf));
 
+   /* Render targets can't use MSAA interleaved layout */
+   assert(!irb->mt->msaa_is_interleaved);
+
    if (irb->mt->align_h == 4)
       surf->ss0.vertical_alignment = 1;
    if (irb->mt->align_w == 8)
@@ -324,6 +331,9 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
    }
 
    surf->ss0.surface_type = BRW_SURFACE_2D;
+   surf->ss0.surface_array_spacing = irb->mt->array_spacing_lod0 ?
+      GEN7_SURFACE_ARYSPC_LOD0 : GEN7_SURFACE_ARYSPC_FULL;
+
    /* reloc */
    surf->ss1.base_addr = intel_renderbuffer_tile_offsets(irb, &tile_x, &tile_y);
    surf->ss1.base_addr += region->bo->offset; /* reloc */
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
index 13e7396..7018c37 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -73,7 +73,8 @@ intel_miptree_create_internal(struct intel_context *intel,
 			      GLuint height0,
 			      GLuint depth0,
 			      bool for_region,
-                              GLuint num_samples)
+                              GLuint num_samples,
+                              bool msaa_is_interleaved)
 {
    struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
    int compress_byte = 0;
@@ -95,8 +96,14 @@ intel_miptree_create_internal(struct intel_context *intel,
    mt->cpp = compress_byte ? compress_byte : _mesa_get_format_bytes(mt->format);
    mt->num_samples = num_samples;
    mt->compressed = compress_byte ? 1 : 0;
+   mt->msaa_is_interleaved = msaa_is_interleaved;
    mt->refcount = 1; 
 
+   /* array_spacing_lod0 is only used for non-interleaved MSAA surfaces.
+    * TODO: can we use it elsewhere?
+    */
+   mt->array_spacing_lod0 = num_samples > 0 && !msaa_is_interleaved;
+
    if (target == GL_TEXTURE_CUBE_MAP) {
       assert(depth0 == 1);
       mt->depth0 = 6;
@@ -109,6 +116,8 @@ intel_miptree_create_internal(struct intel_context *intel,
        (intel->must_use_separate_stencil ||
 	(intel->has_separate_stencil &&
 	 intel->vtbl.is_hiz_depth_format(intel, format)))) {
+      /* MSAA stencil surfaces are always interleaved. */
+      bool msaa_is_interleaved = num_samples > 0;
       mt->stencil_mt = intel_miptree_create(intel,
                                             mt->target,
                                             MESA_FORMAT_S8,
@@ -118,7 +127,8 @@ intel_miptree_create_internal(struct intel_context *intel,
                                             mt->height0,
                                             mt->depth0,
                                             true,
-                                            num_samples);
+                                            num_samples,
+                                            msaa_is_interleaved);
       if (!mt->stencil_mt) {
 	 intel_miptree_release(&mt);
 	 return NULL;
@@ -165,7 +175,8 @@ intel_miptree_create(struct intel_context *intel,
 		     GLuint height0,
 		     GLuint depth0,
 		     bool expect_accelerated_upload,
-                     GLuint num_samples)
+                     GLuint num_samples,
+                     bool msaa_is_interleaved)
 {
    struct intel_mipmap_tree *mt;
    uint32_t tiling = I915_TILING_NONE;
@@ -207,7 +218,7 @@ intel_miptree_create(struct intel_context *intel,
    mt = intel_miptree_create_internal(intel, target, format,
 				      first_level, last_level, width0,
 				      height0, depth0,
-				      false, num_samples);
+				      false, num_samples, msaa_is_interleaved);
    /*
     * pitch == 0 || height == 0  indicates the null texture
     */
@@ -243,7 +254,8 @@ intel_miptree_create_for_region(struct intel_context *intel,
    mt = intel_miptree_create_internal(intel, target, format,
 				      0, 0,
 				      region->width, region->height, 1,
-				      true, 0 /* num_samples */);
+				      true, 0 /* num_samples */,
+                                      false /* msaa_is_interleaved */);
    if (!mt)
       return mt;
 
@@ -252,6 +264,31 @@ intel_miptree_create_for_region(struct intel_context *intel,
    return mt;
 }
 
+/**
+ * Determine whether the MSAA surface being created should use an interleaved
+ * layout or a sliced layout, based on the chip generation and the surface
+ * type.
+ */
+static bool
+msaa_format_is_interleaved(struct intel_context *intel, gl_format format)
+{
+   /* Prior to Gen7, all surfaces used interleaved layout. */
+   if (intel->gen < 7)
+      return true;
+
+   /* In Gen7, interleaved layout is only used for depth and stencil
+    * buffers.
+    */
+   switch (_mesa_get_format_base_format(format)) {
+   case GL_DEPTH_COMPONENT:
+   case GL_STENCIL_INDEX:
+   case GL_DEPTH_STENCIL:
+      return true;
+   default:
+      return false;
+   }
+}
+
 struct intel_mipmap_tree*
 intel_miptree_create_for_renderbuffer(struct intel_context *intel,
                                       gl_format format,
@@ -260,55 +297,71 @@ intel_miptree_create_for_renderbuffer(struct intel_context *intel,
                                       uint32_t num_samples)
 {
    struct intel_mipmap_tree *mt;
-
-   /* Adjust width/height for MSAA.
-    *
-    * In the Sandy Bridge PRM, volume 4, part 1, page 31, it says:
-    *
-    *     "Any of the other messages (sample*, LOD, load4) used with a
-    *      (4x) multisampled surface will in-effect sample a surface with
-    *      double the height and width as that indicated in the surface
-    *      state. Each pixel position on the original-sized surface is
-    *      replaced with a 2x2 of samples with the following arrangement:
-    *
-    *         sample 0 sample 2
-    *         sample 1 sample 3"
-    *
-    * Thus, when sampling from a multisampled texture, it behaves as though
-    * the layout in memory for (x,y,sample) is:
-    *
-    *      (0,0,0) (0,0,2)   (1,0,0) (1,0,2)
-    *      (0,0,1) (0,0,3)   (1,0,1) (1,0,3)
-    *
-    *      (0,1,0) (0,1,2)   (1,1,0) (1,1,2)
-    *      (0,1,1) (0,1,3)   (1,1,1) (1,1,3)
-    *
-    * However, the actual layout of multisampled data in memory is:
-    *
-    *      (0,0,0) (1,0,0)   (0,0,1) (1,0,1)
-    *      (0,1,0) (1,1,0)   (0,1,1) (1,1,1)
-    *
-    *      (0,0,2) (1,0,2)   (0,0,3) (1,0,3)
-    *      (0,1,2) (1,1,2)   (0,1,3) (1,1,3)
-    *
-    * This pattern repeats for each 2x2 pixel block.
-    *
-    * As a result, when calculating the size of our 4-sample buffer for
-    * an odd width or height, we have to align before scaling up because
-    * sample 3 is in that bottom right 2x2 block.
-    */
-   if (num_samples > 4) {
-      num_samples = 8;
-      width = ALIGN(width, 2) * 4;
-      height = ALIGN(height, 2) * 2;
-   } else if (num_samples > 0) {
-      num_samples = 4;
-      width = ALIGN(width, 2) * 2;
-      height = ALIGN(height, 2) * 2;
+   uint32_t depth = 1;
+   bool msaa_is_interleaved = false;
+
+   if (num_samples > 0) {
+      /* Adjust width/height/depth for MSAA */
+      msaa_is_interleaved = msaa_format_is_interleaved(intel, format);
+      if (msaa_is_interleaved) {
+         /* In the Sandy Bridge PRM, volume 4, part 1, page 31, it says:
+          *
+          *     "Any of the other messages (sample*, LOD, load4) used with a
+          *      (4x) multisampled surface will in-effect sample a surface with
+          *      double the height and width as that indicated in the surface
+          *      state. Each pixel position on the original-sized surface is
+          *      replaced with a 2x2 of samples with the following arrangement:
+          *
+          *         sample 0 sample 2
+          *         sample 1 sample 3"
+          *
+          * Thus, when sampling from a multisampled texture, it behaves as
+          * though the layout in memory for (x,y,sample) is:
+          *
+          *      (0,0,0) (0,0,2)   (1,0,0) (1,0,2)
+          *      (0,0,1) (0,0,3)   (1,0,1) (1,0,3)
+          *
+          *      (0,1,0) (0,1,2)   (1,1,0) (1,1,2)
+          *      (0,1,1) (0,1,3)   (1,1,1) (1,1,3)
+          *
+          * However, the actual layout of multisampled data in memory is:
+          *
+          *      (0,0,0) (1,0,0)   (0,0,1) (1,0,1)
+          *      (0,1,0) (1,1,0)   (0,1,1) (1,1,1)
+          *
+          *      (0,0,2) (1,0,2)   (0,0,3) (1,0,3)
+          *      (0,1,2) (1,1,2)   (0,1,3) (1,1,3)
+          *
+          * This pattern repeats for each 2x2 pixel block.
+          *
+          * As a result, when calculating the size of our 4-sample buffer for
+          * an odd width or height, we have to align before scaling up because
+          * sample 3 is in that bottom right 2x2 block.
+          */
+         switch (num_samples) {
+         case 4:
+            width = ALIGN(width, 2) * 2;
+            height = ALIGN(height, 2) * 2;
+            break;
+         case 8:
+            width = ALIGN(width, 2) * 4;
+            height = ALIGN(height, 2) * 2;
+            break;
+         default:
+            /* num_samples should already have been quantized to 0, 4, or
+             * 8.
+             */
+            assert(false);
+         }
+      } else {
+         /* Non-interleaved */
+         depth = num_samples;
+      }
    }
 
    mt = intel_miptree_create(intel, GL_TEXTURE_2D, format, 0, 0,
-			     width, height, 1, true, num_samples);
+			     width, height, depth, true, num_samples,
+                             msaa_is_interleaved);
 
    return mt;
 }
@@ -582,6 +635,8 @@ intel_miptree_alloc_hiz(struct intel_context *intel,
                         GLuint num_samples)
 {
    assert(mt->hiz_mt == NULL);
+   /* MSAA HiZ surfaces are always interleaved. */
+   bool msaa_is_interleaved = num_samples > 0;
    mt->hiz_mt = intel_miptree_create(intel,
                                      mt->target,
                                      MESA_FORMAT_X8_Z24,
@@ -591,7 +646,8 @@ intel_miptree_alloc_hiz(struct intel_context *intel,
                                      mt->height0,
                                      mt->depth0,
                                      true,
-                                     num_samples);
+                                     num_samples,
+                                     msaa_is_interleaved);
 
    if (!mt->hiz_mt)
       return false;
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
index 7536065..5c57e02 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
@@ -172,6 +172,26 @@ struct intel_mipmap_tree
    GLuint num_samples;
    bool compressed;
 
+   /**
+    * For 1D array, 2D array, cube, and 2D multisampled surfaces on Gen7: true
+    * if the surface only contains LOD 0, and hence no space is for LOD's
+    * other than 0 in between array slices.
+    *
+    * Corresponds to the surface_array_spacing bit in gen7_surface_state.
+    */
+   bool array_spacing_lod0;
+
+   /**
+    * For MSAA buffers, there are two possible layouts:
+    * - Interleaved, in which the additional samples are accommodated
+    *   by scaling up the width and height of the surface.
+    * - Sliced, in which the surface is stored as a 2D array, with
+    *   array slice n containing all pixel data for sample n.
+    *
+    * This value is true if num_samples > 0 and the format is interleaved.
+    */
+   bool msaa_is_interleaved;
+
    /* Derived from the above:
     */
    GLuint total_width;
@@ -238,7 +258,8 @@ struct intel_mipmap_tree *intel_miptree_create(struct intel_context *intel,
                                                GLuint height0,
                                                GLuint depth0,
 					       bool expect_accelerated_upload,
-                                               GLuint num_samples);
+                                               GLuint num_samples,
+                                               bool msaa_is_interleaved);
 
 struct intel_mipmap_tree *
 intel_miptree_create_for_region(struct intel_context *intel,
diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c
index 68f4ff4..abd75c5 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_image.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_image.c
@@ -100,7 +100,8 @@ intel_miptree_create_for_teximage(struct intel_context *intel,
 			       height,
 			       depth,
 			       expect_accelerated_upload,
-                               0 /* num_samples */);
+                               0 /* num_samples */,
+                               false /* msaa_is_interleaved */);
 }
 
 /* There are actually quite a few combinations this will work for,
diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c
index cadba29..c2d7d67 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c
@@ -87,7 +87,8 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
                                           height,
                                           depth,
 					  true,
-                                          0 /* num_samples */);
+                                          0 /* num_samples */,
+                                          false /* msaa_is_interleaved */);
       if (!intelObj->mt)
          return false;
    }




More information about the mesa-commit mailing list