[Mesa-dev] [PATCH] swr: MSAA fixes: piglit crashes, additional formats, improve perf.

Bruce Cherniak bruce.cherniak at intel.com
Wed Apr 26 13:24:36 UTC 2017


This patch moves msaa resolve down into core/StoreTiles where it can
take advantage of all the surface formats - previous resolve
was limited to 8-bit unsigned.  This fixes a number of piglit msaa
tests that were crashing.  MSAA performance is also greatly improved
because resolve is done in parallel.

MSAA is still disabled by default, but can be enabled with
"export SWR_MSAA_MAX_COUNT=4" (1,2,4,8,16 are options)
The default is 0, which is disabled.

Because it fixes piglit crashes, this should be included in 17.1 stable.

cc: mesa-stable at lists.freedesktop.org

---
 .../drivers/swr/rasterizer/memory/StoreTile.h      | 75 +++++++++++++++++++++
 src/gallium/drivers/swr/swr_context.cpp            | 77 +---------------------
 src/gallium/drivers/swr/swr_screen.cpp             | 10 +--
 3 files changed, 82 insertions(+), 80 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h
index ffde574c03..12a5f3d8ce 100644
--- a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h
+++ b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h
@@ -1133,6 +1133,64 @@ struct StoreRasterTile
             }
         }
     }
+
+    //////////////////////////////////////////////////////////////////////////
+    /// @brief Resolves an 8x8 raster tile to the resolve destination surface.
+    /// @param pSrc - Pointer to raster tile.
+    /// @param pDstSurface - Destination surface state
+    /// @param x, y - Coordinates to raster tile.
+    /// @param sampleOffset - Offset between adjacent multisamples
+    INLINE static void Resolve(
+        uint8_t *pSrc,
+        SWR_SURFACE_STATE* pDstSurface,
+        uint32_t x, uint32_t y, uint32_t sampleOffset, uint32_t renderTargetArrayIndex) // (x, y) pixel coordinate to start of raster tile.
+    {
+        uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);
+        uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);
+
+        float oneOverNumSamples = 1.0f / pDstSurface->numSamples;
+
+        // For each raster tile pixel (rx, ry)
+        for (uint32_t ry = 0; ry < KNOB_TILE_Y_DIM; ++ry)
+        {
+            for (uint32_t rx = 0; rx < KNOB_TILE_X_DIM; ++rx)
+            {
+                // Perform bounds checking.
+                if (((x + rx) < lodWidth) &&
+                        ((y + ry) < lodHeight))
+                {
+                    // Sum across samples
+                    float resolveColor[4] = {0};
+                    for (uint32_t sampleNum = 0; sampleNum < pDstSurface->numSamples; sampleNum++)
+                    {
+                        float sampleColor[4] = {0};
+                        uint8_t *pSampleSrc = pSrc + sampleOffset * sampleNum;
+                        GetSwizzledSrcColor(pSampleSrc, rx, ry, sampleColor);
+                        resolveColor[0] += sampleColor[0];
+                        resolveColor[1] += sampleColor[1];
+                        resolveColor[2] += sampleColor[2];
+                        resolveColor[3] += sampleColor[3];
+                    }
+
+                    // Divide by numSamples to average
+                    resolveColor[0] *= oneOverNumSamples;
+                    resolveColor[1] *= oneOverNumSamples;
+                    resolveColor[2] *= oneOverNumSamples;
+                    resolveColor[3] *= oneOverNumSamples;
+
+                    // Use the resolve surface state
+                    SWR_SURFACE_STATE* pResolveSurface = (SWR_SURFACE_STATE*)pDstSurface->pAuxBaseAddress;
+                    uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>((x + rx), (y + ry),
+                        pResolveSurface->arrayIndex + renderTargetArrayIndex, pResolveSurface->arrayIndex + renderTargetArrayIndex,
+                        0, pResolveSurface->lod, pResolveSurface);
+                    {
+                        ConvertPixelFromFloat<DstFormat>(pDst, resolveColor);
+                    }
+                }
+            }
+        }
+    }
+
 };
 
 template<typename TTraits, SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>
@@ -2316,6 +2374,9 @@ struct StoreMacroTile
             pfnStore[sampleNum] = (bForceGeneric || KNOB_USE_GENERIC_STORETILE) ? StoreRasterTile<TTraits, SrcFormat, DstFormat>::Store : OptStoreRasterTile<TTraits, SrcFormat, DstFormat>::Store;
         }
 
+        // Save original for pSrcHotTile resolve.
+        uint8_t *pResolveSrcHotTile = pSrcHotTile;
+
         // Store each raster tile from the hot tile to the destination surface.
         for(uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
         {
@@ -2328,6 +2389,20 @@ struct StoreMacroTile
                 }
             }
         }
+
+        if (pDstSurface->pAuxBaseAddress)
+        {
+            uint32_t sampleOffset = KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<SrcFormat>::bpp / 8);
+            // Store each raster tile from the hot tile to the destination surface.
+            for(uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
+            {
+                for(uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
+                {
+                    StoreRasterTile<TTraits, SrcFormat, DstFormat>::Resolve(pResolveSrcHotTile, pDstSurface, (x + col), (y + row), sampleOffset, renderTargetArrayIndex);
+                    pResolveSrcHotTile += sampleOffset * pDstSurface->numSamples;
+                }
+            }
+        }
     }
 };
 
diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp
index aa5cca8e65..4b7a321e51 100644
--- a/src/gallium/drivers/swr/swr_context.cpp
+++ b/src/gallium/drivers/swr/swr_context.cpp
@@ -267,65 +267,6 @@ swr_resource_copy(struct pipe_context *pipe,
 }
 
 
-/* XXX: This resolve is incomplete and suboptimal. It will be removed once the
- * pipelined resolve blit works. */
-void
-swr_do_msaa_resolve(struct pipe_resource *src_resource,
-                    struct pipe_resource *dst_resource)
-{
-   /* This is a pretty dumb inline resolve.  It only supports 8-bit formats
-    * (ex RGBA8/BGRA8) - which are most common display formats anyway.
-    */
-
-   /* quick check for 8-bit and number of components */
-   uint8_t bits_per_component =
-      util_format_get_component_bits(src_resource->format,
-            UTIL_FORMAT_COLORSPACE_RGB, 0);
-
-   /* Unsupported resolve format */
-   assert(src_resource->format == dst_resource->format);
-   assert(bits_per_component == 8);
-   if ((src_resource->format != dst_resource->format) ||
-       (bits_per_component != 8)) {
-      return;
-   }
-
-   uint8_t src_num_comps = util_format_get_nr_components(src_resource->format);
-
-   SWR_SURFACE_STATE *src_surface = &swr_resource(src_resource)->swr;
-   SWR_SURFACE_STATE *dst_surface = &swr_resource(dst_resource)->swr;
-
-   uint32_t *src, *dst, offset;
-   uint32_t num_samples = src_surface->numSamples;
-   float recip_num_samples = 1.0f / num_samples;
-   for (uint32_t y = 0; y < src_surface->height; y++) {
-      for (uint32_t x = 0; x < src_surface->width; x++) {
-         float r = 0.0f;
-         float g = 0.0f;
-         float b = 0.0f;
-         float a = 0.0f;
-         for (uint32_t sampleNum = 0;  sampleNum < num_samples; sampleNum++) {
-            offset = ComputeSurfaceOffset<false>(x, y, 0, 0, sampleNum, 0, src_surface);
-            src = (uint32_t *) src_surface->pBaseAddress + offset/src_num_comps;
-            const uint32_t sample = *src;
-            r += (float)((sample >> 24) & 0xff) / 255.0f * recip_num_samples;
-            g += (float)((sample >> 16) & 0xff) / 255.0f * recip_num_samples;
-            b += (float)((sample >>  8) & 0xff) / 255.0f * recip_num_samples;
-            a += (float)((sample      ) & 0xff) / 255.0f * recip_num_samples;
-         }
-         uint32_t result = 0;
-         result  = ((uint8_t)(r * 255.0f) & 0xff) << 24;
-         result |= ((uint8_t)(g * 255.0f) & 0xff) << 16;
-         result |= ((uint8_t)(b * 255.0f) & 0xff) <<  8;
-         result |= ((uint8_t)(a * 255.0f) & 0xff);
-         offset = ComputeSurfaceOffset<false>(x, y, 0, 0, 0, 0, src_surface);
-         dst = (uint32_t *) dst_surface->pBaseAddress + offset/src_num_comps;
-         *dst = result;
-      }
-   }
-}
-
-
 static void
 swr_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info)
 {
@@ -342,28 +283,14 @@ swr_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info)
       debug_printf("swr_blit: color resolve : %d -> %d\n",
             info.src.resource->nr_samples, info.dst.resource->nr_samples);
 
-      /* Because the resolve is being done inline (not pipelined),
-       * resources need to be stored out of hottiles and the pipeline empty.
-       *
-       * Resources are marked unused following fence finish because all
-       * pipeline operations are complete.  Validation of the blit will mark
-       * them are read/write again.
-       */
+      /* Resolve is done as part of the surface store. */
       swr_store_dirty_resource(pipe, info.src.resource, SWR_TILE_RESOLVED);
-      swr_store_dirty_resource(pipe, info.dst.resource, SWR_TILE_RESOLVED);
-      swr_fence_finish(pipe->screen, NULL, swr_screen(pipe->screen)->flush_fence, 0);
-      swr_resource_unused(info.src.resource);
-      swr_resource_unused(info.dst.resource);
 
       struct pipe_resource *src_resource = info.src.resource;
       struct pipe_resource *resolve_target =
          swr_resource(src_resource)->resolve_target;
 
-      /* Inline resolve samples into resolve target resource, then continue
-       * the blit. */
-      swr_do_msaa_resolve(src_resource, resolve_target);
-
-      /* The resolve target becomes the new source for the blit.  */
+      /* The resolve target becomes the new source for the blit. */
       info.src.resource = resolve_target;
    }
 
diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp
index 04e613745a..5a36d0211a 100644
--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -891,6 +891,10 @@ swr_create_resolve_resource(struct pipe_screen *_screen,
 
       /* Attach it to the multisample resource */
       msaa_res->resolve_target = alt;
+
+      /* Hang resolve surface state off the multisample surface state to so
+       * StoreTiles knows where to resolve the surface. */
+      msaa_res->swr.pAuxBaseAddress =  (uint8_t *)&swr_resource(alt)->swr;
    }
 
    return true; /* success */
@@ -1009,14 +1013,10 @@ swr_flush_frontbuffer(struct pipe_screen *p_screen,
       SwrEndFrame(swr_context(pipe)->swrContext);
    }
 
-   /* Multisample surfaces need to be resolved before present */
+   /* Multisample resolved into resolve_target at flush with store_resouce */
    if (pipe && spr->swr.numSamples > 1) {
       struct pipe_resource *resolve_target = spr->resolve_target;
 
-      /* Do an inline surface resolve into the resolve target resource
-       * XXX: This works, just not optimal. Work on using a pipelined blit. */
-      swr_do_msaa_resolve(resource, resolve_target);
-
       /* Once resolved, copy into display target */
       SWR_SURFACE_STATE *resolve = &swr_resource(resolve_target)->swr;
 
-- 
2.11.0



More information about the mesa-dev mailing list