[Mesa-dev] [PATCH v2] swr: move msaa resolve to generalized StoreTile

Cherniak, Bruce bruce.cherniak at intel.com
Fri Apr 28 00:45:18 UTC 2017


> On Apr 27, 2017, at 7:38 PM, Ilia Mirkin <imirkin at alum.mit.edu> wrote:
> 
> Erm, so ... what happens if I render to FB1, then render to FB2, then
> render to FB1 again (and I have blending enabled)? Doesn't the resolve
> lose the per-sample information? Or does the resolve merely precompute
> the resolved version on the off chance that it's needed, without
> losing the source data?

The resolve occurs into a secondary, driver private, surface.  All per-sample
information is maintained in the original surfaces.

Yes, the resolve is currently done "on the off chance that it’s needed”.
There is likely an optimization to be had there, but it should be functionally
correct.

> On Thu, Apr 27, 2017 at 8:22 PM, Bruce Cherniak
> <bruce.cherniak at intel.com> wrote:
>> v2: Reword commit message to more closely adhere to community
>> guidelines.
>> 
>> This patch moves msaa resolve down into core/StoreTiles where the
>> surface format conversion routines are available.  The previous
>> "experimental" resolve was limited to 8-bit unsigned render targets.
>> 
>> This fixes a number of piglit msaa tests by adding resolve support for
>> all the render target formats we support.
>> 
>> MSAA is still disabled by default, but can be enabled with
>> "export SWR_MSAA_MAX_COUNT=4" (1,2,4,8,16 are options)
>> The default is 0, which is disabled.
>> 
>> Because it fixes a number of piglit tests, I kindly request inclusion
>> into 17.1 stable.
>> 
>> cc: mesa-stable at lists.freedesktop.org
>> ---
>> .../drivers/swr/rasterizer/memory/StoreTile.h      | 75 +++++++++++++++++++++
>> src/gallium/drivers/swr/swr_context.cpp            | 77 +---------------------
>> src/gallium/drivers/swr/swr_screen.cpp             | 10 +--
>> 3 files changed, 82 insertions(+), 80 deletions(-)
>> 
>> diff --git a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h
>> index ffde574c03..12a5f3d8ce 100644
>> --- a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h
>> +++ b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h
>> @@ -1133,6 +1133,64 @@ struct StoreRasterTile
>>             }
>>         }
>>     }
>> +
>> +    //////////////////////////////////////////////////////////////////////////
>> +    /// @brief Resolves an 8x8 raster tile to the resolve destination surface.
>> +    /// @param pSrc - Pointer to raster tile.
>> +    /// @param pDstSurface - Destination surface state
>> +    /// @param x, y - Coordinates to raster tile.
>> +    /// @param sampleOffset - Offset between adjacent multisamples
>> +    INLINE static void Resolve(
>> +        uint8_t *pSrc,
>> +        SWR_SURFACE_STATE* pDstSurface,
>> +        uint32_t x, uint32_t y, uint32_t sampleOffset, uint32_t renderTargetArrayIndex) // (x, y) pixel coordinate to start of raster tile.
>> +    {
>> +        uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);
>> +        uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);
>> +
>> +        float oneOverNumSamples = 1.0f / pDstSurface->numSamples;
>> +
>> +        // For each raster tile pixel (rx, ry)
>> +        for (uint32_t ry = 0; ry < KNOB_TILE_Y_DIM; ++ry)
>> +        {
>> +            for (uint32_t rx = 0; rx < KNOB_TILE_X_DIM; ++rx)
>> +            {
>> +                // Perform bounds checking.
>> +                if (((x + rx) < lodWidth) &&
>> +                        ((y + ry) < lodHeight))
>> +                {
>> +                    // Sum across samples
>> +                    float resolveColor[4] = {0};
>> +                    for (uint32_t sampleNum = 0; sampleNum < pDstSurface->numSamples; sampleNum++)
>> +                    {
>> +                        float sampleColor[4] = {0};
>> +                        uint8_t *pSampleSrc = pSrc + sampleOffset * sampleNum;
>> +                        GetSwizzledSrcColor(pSampleSrc, rx, ry, sampleColor);
>> +                        resolveColor[0] += sampleColor[0];
>> +                        resolveColor[1] += sampleColor[1];
>> +                        resolveColor[2] += sampleColor[2];
>> +                        resolveColor[3] += sampleColor[3];
>> +                    }
>> +
>> +                    // Divide by numSamples to average
>> +                    resolveColor[0] *= oneOverNumSamples;
>> +                    resolveColor[1] *= oneOverNumSamples;
>> +                    resolveColor[2] *= oneOverNumSamples;
>> +                    resolveColor[3] *= oneOverNumSamples;
>> +
>> +                    // Use the resolve surface state
>> +                    SWR_SURFACE_STATE* pResolveSurface = (SWR_SURFACE_STATE*)pDstSurface->pAuxBaseAddress;
>> +                    uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>((x + rx), (y + ry),
>> +                        pResolveSurface->arrayIndex + renderTargetArrayIndex, pResolveSurface->arrayIndex + renderTargetArrayIndex,
>> +                        0, pResolveSurface->lod, pResolveSurface);
>> +                    {
>> +                        ConvertPixelFromFloat<DstFormat>(pDst, resolveColor);
>> +                    }
>> +                }
>> +            }
>> +        }
>> +    }
>> +
>> };
>> 
>> template<typename TTraits, SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>
>> @@ -2316,6 +2374,9 @@ struct StoreMacroTile
>>             pfnStore[sampleNum] = (bForceGeneric || KNOB_USE_GENERIC_STORETILE) ? StoreRasterTile<TTraits, SrcFormat, DstFormat>::Store : OptStoreRasterTile<TTraits, SrcFormat, DstFormat>::Store;
>>         }
>> 
>> +        // Save original for pSrcHotTile resolve.
>> +        uint8_t *pResolveSrcHotTile = pSrcHotTile;
>> +
>>         // Store each raster tile from the hot tile to the destination surface.
>>         for(uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
>>         {
>> @@ -2328,6 +2389,20 @@ struct StoreMacroTile
>>                 }
>>             }
>>         }
>> +
>> +        if (pDstSurface->pAuxBaseAddress)
>> +        {
>> +            uint32_t sampleOffset = KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<SrcFormat>::bpp / 8);
>> +            // Store each raster tile from the hot tile to the destination surface.
>> +            for(uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
>> +            {
>> +                for(uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
>> +                {
>> +                    StoreRasterTile<TTraits, SrcFormat, DstFormat>::Resolve(pResolveSrcHotTile, pDstSurface, (x + col), (y + row), sampleOffset, renderTargetArrayIndex);
>> +                    pResolveSrcHotTile += sampleOffset * pDstSurface->numSamples;
>> +                }
>> +            }
>> +        }
>>     }
>> };
>> 
>> diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp
>> index aa5cca8e65..4b7a321e51 100644
>> --- a/src/gallium/drivers/swr/swr_context.cpp
>> +++ b/src/gallium/drivers/swr/swr_context.cpp
>> @@ -267,65 +267,6 @@ swr_resource_copy(struct pipe_context *pipe,
>> }
>> 
>> 
>> -/* XXX: This resolve is incomplete and suboptimal. It will be removed once the
>> - * pipelined resolve blit works. */
>> -void
>> -swr_do_msaa_resolve(struct pipe_resource *src_resource,
>> -                    struct pipe_resource *dst_resource)
>> -{
>> -   /* This is a pretty dumb inline resolve.  It only supports 8-bit formats
>> -    * (ex RGBA8/BGRA8) - which are most common display formats anyway.
>> -    */
>> -
>> -   /* quick check for 8-bit and number of components */
>> -   uint8_t bits_per_component =
>> -      util_format_get_component_bits(src_resource->format,
>> -            UTIL_FORMAT_COLORSPACE_RGB, 0);
>> -
>> -   /* Unsupported resolve format */
>> -   assert(src_resource->format == dst_resource->format);
>> -   assert(bits_per_component == 8);
>> -   if ((src_resource->format != dst_resource->format) ||
>> -       (bits_per_component != 8)) {
>> -      return;
>> -   }
>> -
>> -   uint8_t src_num_comps = util_format_get_nr_components(src_resource->format);
>> -
>> -   SWR_SURFACE_STATE *src_surface = &swr_resource(src_resource)->swr;
>> -   SWR_SURFACE_STATE *dst_surface = &swr_resource(dst_resource)->swr;
>> -
>> -   uint32_t *src, *dst, offset;
>> -   uint32_t num_samples = src_surface->numSamples;
>> -   float recip_num_samples = 1.0f / num_samples;
>> -   for (uint32_t y = 0; y < src_surface->height; y++) {
>> -      for (uint32_t x = 0; x < src_surface->width; x++) {
>> -         float r = 0.0f;
>> -         float g = 0.0f;
>> -         float b = 0.0f;
>> -         float a = 0.0f;
>> -         for (uint32_t sampleNum = 0;  sampleNum < num_samples; sampleNum++) {
>> -            offset = ComputeSurfaceOffset<false>(x, y, 0, 0, sampleNum, 0, src_surface);
>> -            src = (uint32_t *) src_surface->pBaseAddress + offset/src_num_comps;
>> -            const uint32_t sample = *src;
>> -            r += (float)((sample >> 24) & 0xff) / 255.0f * recip_num_samples;
>> -            g += (float)((sample >> 16) & 0xff) / 255.0f * recip_num_samples;
>> -            b += (float)((sample >>  8) & 0xff) / 255.0f * recip_num_samples;
>> -            a += (float)((sample      ) & 0xff) / 255.0f * recip_num_samples;
>> -         }
>> -         uint32_t result = 0;
>> -         result  = ((uint8_t)(r * 255.0f) & 0xff) << 24;
>> -         result |= ((uint8_t)(g * 255.0f) & 0xff) << 16;
>> -         result |= ((uint8_t)(b * 255.0f) & 0xff) <<  8;
>> -         result |= ((uint8_t)(a * 255.0f) & 0xff);
>> -         offset = ComputeSurfaceOffset<false>(x, y, 0, 0, 0, 0, src_surface);
>> -         dst = (uint32_t *) dst_surface->pBaseAddress + offset/src_num_comps;
>> -         *dst = result;
>> -      }
>> -   }
>> -}
>> -
>> -
>> static void
>> swr_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info)
>> {
>> @@ -342,28 +283,14 @@ swr_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info)
>>       debug_printf("swr_blit: color resolve : %d -> %d\n",
>>             info.src.resource->nr_samples, info.dst.resource->nr_samples);
>> 
>> -      /* Because the resolve is being done inline (not pipelined),
>> -       * resources need to be stored out of hottiles and the pipeline empty.
>> -       *
>> -       * Resources are marked unused following fence finish because all
>> -       * pipeline operations are complete.  Validation of the blit will mark
>> -       * them are read/write again.
>> -       */
>> +      /* Resolve is done as part of the surface store. */
>>       swr_store_dirty_resource(pipe, info.src.resource, SWR_TILE_RESOLVED);
>> -      swr_store_dirty_resource(pipe, info.dst.resource, SWR_TILE_RESOLVED);
>> -      swr_fence_finish(pipe->screen, NULL, swr_screen(pipe->screen)->flush_fence, 0);
>> -      swr_resource_unused(info.src.resource);
>> -      swr_resource_unused(info.dst.resource);
>> 
>>       struct pipe_resource *src_resource = info.src.resource;
>>       struct pipe_resource *resolve_target =
>>          swr_resource(src_resource)->resolve_target;
>> 
>> -      /* Inline resolve samples into resolve target resource, then continue
>> -       * the blit. */
>> -      swr_do_msaa_resolve(src_resource, resolve_target);
>> -
>> -      /* The resolve target becomes the new source for the blit.  */
>> +      /* The resolve target becomes the new source for the blit. */
>>       info.src.resource = resolve_target;
>>    }
>> 
>> diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp
>> index 04e613745a..5a36d0211a 100644
>> --- a/src/gallium/drivers/swr/swr_screen.cpp
>> +++ b/src/gallium/drivers/swr/swr_screen.cpp
>> @@ -891,6 +891,10 @@ swr_create_resolve_resource(struct pipe_screen *_screen,
>> 
>>       /* Attach it to the multisample resource */
>>       msaa_res->resolve_target = alt;
>> +
>> +      /* Hang resolve surface state off the multisample surface state to so
>> +       * StoreTiles knows where to resolve the surface. */
>> +      msaa_res->swr.pAuxBaseAddress =  (uint8_t *)&swr_resource(alt)->swr;
>>    }
>> 
>>    return true; /* success */
>> @@ -1009,14 +1013,10 @@ swr_flush_frontbuffer(struct pipe_screen *p_screen,
>>       SwrEndFrame(swr_context(pipe)->swrContext);
>>    }
>> 
>> -   /* Multisample surfaces need to be resolved before present */
>> +   /* Multisample resolved into resolve_target at flush with store_resouce */
>>    if (pipe && spr->swr.numSamples > 1) {
>>       struct pipe_resource *resolve_target = spr->resolve_target;
>> 
>> -      /* Do an inline surface resolve into the resolve target resource
>> -       * XXX: This works, just not optimal. Work on using a pipelined blit. */
>> -      swr_do_msaa_resolve(resource, resolve_target);
>> -
>>       /* Once resolved, copy into display target */
>>       SWR_SURFACE_STATE *resolve = &swr_resource(resolve_target)->swr;
>> 
>> --
>> 2.11.0
>> 
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev



More information about the mesa-dev mailing list