[Mesa-dev] [PATCH 3/3] swr/rast: fix memory paths for avx512 optimized avx/sse

Fri Jul 21 20:13:07 UTC 2017

Reviewed-by: Bruce Cherniak <bruce.cherniak at intel.com> 

> On Jul 20, 2017, at 5:09 PM, Tim Rowley <timothy.o.rowley at intel.com> wrote:
> 
> Source/destination will not be AVX512 aligned, use the
> unaligned load/store intrinsics.
> ---
> .../drivers/swr/rasterizer/common/simdlib_128_avx512.inl       | 10 +++++-----
> .../drivers/swr/rasterizer/common/simdlib_256_avx512.inl       | 10 +++++-----
> 2 files changed, 10 insertions(+), 10 deletions(-)
> 
> diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl
> index aaa74146ad..012f3105e9 100644
> --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl
> +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl
> @@ -294,12 +294,12 @@ SIMD_IWRAPPER_2_8(unpacklo_epi8);
> //-----------------------------------------------------------------------
> static SIMDINLINE Float SIMDCALL load_ps(float const *p)   // return *p    (loads SIMD width elements from memory)
> {
> -    return __conv(_mm512_maskz_load_ps(__mmask16(0xf), p));
> +    return __conv(_mm512_maskz_loadu_ps(__mmask16(0xf), p));
> }
> 
> static SIMDINLINE Integer SIMDCALL load_si(Integer const *p)  // return *p
> {
> -    return __conv(_mm512_maskz_load_epi32(__mmask16(0xf), p));
> +    return __conv(_mm512_maskz_loadu_epi32(__mmask16(0xf), p));
> }
> 
> static SIMDINLINE Float SIMDCALL loadu_ps(float const *p)  // return *p    (same as load_ps but allows for unaligned mem)
> @@ -353,17 +353,17 @@ static SIMDINLINE void SIMDCALL maskstore_ps(float *p, Integer mask, Float src)
> {
>     __mmask16 m = 0xf;
>     m = _mm512_mask_test_epi32_mask(m, __conv(mask), _mm512_set1_epi32(0x80000000));
> -    _mm512_mask_store_ps(p, m, __conv(src));
> +    _mm512_mask_storeu_ps(p, m, __conv(src));
> }
> 
> static SIMDINLINE void SIMDCALL store_ps(float *p, Float a)    // *p = a   (stores all elements contiguously in memory)
> {
> -    _mm512_mask_store_ps(p, __mmask16(0xf), __conv(a));
> +    _mm512_mask_storeu_ps(p, __mmask16(0xf), __conv(a));
> }
> 
> static SIMDINLINE void SIMDCALL store_si(Integer *p, Integer a)   // *p = a
> {
> -    _mm512_mask_store_epi32(p, __mmask16(0xf), __conv(a));
> +    _mm512_mask_storeu_epi32(p, __mmask16(0xf), __conv(a));
> }
> 
> //=======================================================================
> diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl
> index 5103bdafa2..a8d2a4b8bf 100644
> --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl
> +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl
> @@ -295,12 +295,12 @@ SIMD_IWRAPPER_2_8(unpacklo_epi8);
> //-----------------------------------------------------------------------
> static SIMDINLINE Float SIMDCALL load_ps(float const *p)   // return *p    (loads SIMD width elements from memory)
> {
> -    return __conv(_mm512_maskz_load_ps(__mmask16(0xff), p));
> +    return __conv(_mm512_maskz_loadu_ps(__mmask16(0xff), p));
> }
> 
> static SIMDINLINE Integer SIMDCALL load_si(Integer const *p)  // return *p
> {
> -    return __conv(_mm512_maskz_load_epi32(__mmask16(0xff), p));
> +    return __conv(_mm512_maskz_loadu_epi32(__mmask16(0xff), p));
> }
> 
> static SIMDINLINE Float SIMDCALL loadu_ps(float const *p)  // return *p    (same as load_ps but allows for unaligned mem)
> @@ -354,17 +354,17 @@ static SIMDINLINE void SIMDCALL maskstore_ps(float *p, Integer mask, Float src)
> {
>     __mmask16 m = 0xff;
>     m = _mm512_mask_test_epi32_mask(m, __conv(mask), _mm512_set1_epi32(0x80000000));
> -    _mm512_mask_store_ps(p, m, __conv(src));
> +    _mm512_mask_storeu_ps(p, m, __conv(src));
> }
> 
> static SIMDINLINE void SIMDCALL store_ps(float *p, Float a)    // *p = a   (stores all elements contiguously in memory)
> {
> -    _mm512_mask_store_ps(p, __mmask16(0xff), __conv(a));
> +    _mm512_mask_storeu_ps(p, __mmask16(0xff), __conv(a));
> }
> 
> static SIMDINLINE void SIMDCALL store_si(Integer *p, Integer a)   // *p = a
> {
> -    _mm512_mask_store_epi32(p, __mmask16(0xff), __conv(a));
> +    _mm512_mask_storeu_epi32(p, __mmask16(0xff), __conv(a));
> }
> 
> //=======================================================================
> -- 
> 2.11.0
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev