[Mesa-dev] [PATCH] gallivm: don't use pavg.b intrinsic on llvm >= 6.0

Jose Fonseca jfonseca at vmware.com
Fri Dec 21 07:41:40 UTC 2018


On 21/12/2018 00:25, sroland at vmware.com wrote:
> From: Roland Scheidegger <sroland at vmware.com>
> 
> This intrinsic disppeared with llvm 6.0, using it ends up in segfaults
> (due to llvm issuing call to NULL address in the jited shaders).
> Add code doing the same thing as the autoupgrade code in llvm so it
> can be matched and replaced back with a pavgb.
> 
> While here, also improve lp_test_format, so it tests both with and without
> cache (as it was, it tested the cache versions only, whereas cache is
> actually disabled in llvmpipe, and in any case even with it enabled
> vertex and geometry shaders wouldn't use it). (Although at least for
> the unorm8 uncached fetch, the code is still quite different to what
> llvmpipe is using, since that would use unorm8x16 type, whereas
> the test code is using unorm8x4 type, hence disabling some intrinsic
> paths.)
> 
> Fixes: 6f4083143bb8c478ccfcaef034d183d89b471993
> ---
>   .../auxiliary/gallivm/lp_bld_format_s3tc.c    | 55 +++++++++--
>   src/gallium/drivers/llvmpipe/lp_test_format.c | 91 ++++++++++---------
>   2 files changed, 95 insertions(+), 51 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c b/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c
> index 2b143566f24..9561c349dad 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c
> @@ -457,6 +457,50 @@ color_expand_565_to_8888(struct gallivm_state *gallivm,
>   }
>   
>   
> +/*
> + * Average two byte vectors. (Will always round up.)
> + */
> +static LLVMValueRef
> +lp_build_pavgb(struct lp_build_context *bld8,
> +               LLVMValueRef v0,
> +               LLVMValueRef v1)
> +{
> +   struct gallivm_state *gallivm = bld8->gallivm;
> +   LLVMBuilderRef builder = gallivm->builder;
> +   assert(bld8->type.width == 8);
> +   assert(bld8->type.length == 16 || bld8->type.length == 32);
> +   if (HAVE_LLVM < 0x0600) {
> +      LLVMValueRef intrargs[2];
> +      char *intr_name = bld8->type.length == 32 ? "llvm.x86.avx2.pavg.b" :
> +                                                  "llvm.x86.sse2.pavg.b";
> +      intrargs[0] = v0;
> +      intrargs[1] = v1;
> +      return lp_build_intrinsic(builder, intr_name,
> +                                bld8->vec_type, intrargs, 2, 0);
> +   } else {
> +      /*
> +       * Must match llvm's autoupgrade of pavg.b intrinsic to be useful.
> +       * You better hope the backend code manages to detect the pattern, and
> +       * the pattern doesn't change there...
> +       */
> +      struct lp_type type_ext = bld8->type;
> +      LLVMTypeRef vec_type_ext;
> +      LLVMValueRef res;
> +      LLVMValueRef ext_one;
> +      type_ext.width = 16;
> +      vec_type_ext = lp_build_vec_type(gallivm, type_ext);
> +      ext_one = lp_build_const_vec(gallivm, type_ext, 1);
> +
> +      v0 = LLVMBuildZExt(builder, v0, vec_type_ext, "");
> +      v1 = LLVMBuildZExt(builder, v1, vec_type_ext, "");
> +      res = LLVMBuildAdd(builder, v0, v1, "");
> +      res = LLVMBuildAdd(builder, res, ext_one, "");
> +      res = LLVMBuildLShr(builder, res, ext_one, "");
> +      res = LLVMBuildTrunc(builder, res, bld8->vec_type, "");
> +      return res;
> +   }
> +}
> +
>   /**
>    * Calculate 1/3(v1-v0) + v0
>    * and 2*1/3(v1-v0) + v0
> @@ -602,13 +646,7 @@ s3tc_dxt1_full_to_rgba_aos(struct gallivm_state *gallivm,
>          */
>         if ((util_cpu_caps.has_sse2 && n == 4) ||
>             (util_cpu_caps.has_avx2 && n == 8)) {
> -         LLVMValueRef intrargs[2];
> -         char *intr_name = n == 8 ? "llvm.x86.avx2.pavg.b" :
> -                                    "llvm.x86.sse2.pavg.b";
> -         intrargs[0] = colors0;
> -         intrargs[1] = colors1;
> -         color2_2 = lp_build_intrinsic(builder, intr_name,
> -                                       bld8.vec_type, intrargs, 2, 0);
> +         color2_2 = lp_build_pavgb(&bld8, colors0, colors1);
>            color2_2 = LLVMBuildBitCast(builder, color2_2, bld32.vec_type, "");
>         }
>         else {
> @@ -1278,8 +1316,7 @@ s3tc_decode_block_dxt1(struct gallivm_state *gallivm,
>            /* same interleave as for lerp23 - correct result in 2nd element */
>            intrargs[1] = lp_build_interleave2(gallivm, type32, color01, color01, 0);
>            intrargs[1] = LLVMBuildBitCast(builder, intrargs[1], bld8.vec_type, "");
> -         color2_2 = lp_build_intrinsic(builder, "llvm.x86.sse2.pavg.b",
> -                                       bld8.vec_type, intrargs, 2, 0);
> +         color2_2 = lp_build_pavgb(&bld8, intrargs[0], intrargs[1]);
>         }
>         else {
>            LLVMValueRef v01, v0, v1, vhalf;
> diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
> index a8aa33d8ae9..885d886cfa9 100644
> --- a/src/gallium/drivers/llvmpipe/lp_test_format.c
> +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
> @@ -44,8 +44,6 @@
>   
>   #include "lp_test.h"
>   
> -#define USE_TEXTURE_CACHE 1
> -
>   static struct lp_build_format_cache *cache_ptr;
>   
>   void
> @@ -80,7 +78,8 @@ typedef void
>   static LLVMValueRef
>   add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose,
>                       const struct util_format_description *desc,
> -                    struct lp_type type)
> +                    struct lp_type type,
> +                    unsigned use_cache)
>   {
>      char name[256];
>      LLVMContextRef context = gallivm->context;
> @@ -114,7 +113,7 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose,
>      i = LLVMGetParam(func, 2);
>      j = LLVMGetParam(func, 3);
>   
> -   if (cache_ptr) {
> +   if (use_cache) {
>         cache = LLVMGetParam(func, 4);
>      }
>   
> @@ -137,7 +136,8 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose,
>   PIPE_ALIGN_STACK
>   static boolean
>   test_format_float(unsigned verbose, FILE *fp,
> -                  const struct util_format_description *desc)
> +                  const struct util_format_description *desc,
> +                  unsigned use_cache)
>   {
>      LLVMContextRef context;
>      struct gallivm_state *gallivm;
> @@ -152,7 +152,8 @@ test_format_float(unsigned verbose, FILE *fp,
>      context = LLVMContextCreate();
>      gallivm = gallivm_create("test_module_float", context);
>   
> -   fetch = add_fetch_rgba_test(gallivm, verbose, desc, lp_float32_vec4_type());
> +   fetch = add_fetch_rgba_test(gallivm, verbose, desc,
> +                               lp_float32_vec4_type(), use_cache);
>   
>      gallivm_compile_module(gallivm);
>   
> @@ -181,7 +182,7 @@ test_format_float(unsigned verbose, FILE *fp,
>   
>                  memset(unpacked, 0, sizeof unpacked);
>   
> -               fetch_ptr(unpacked, packed, j, i, cache_ptr);
> +               fetch_ptr(unpacked, packed, j, i, use_cache ? cache_ptr : NULL);
>   
>                  for(k = 0; k < 4; ++k) {
>                     if (util_double_inf_sign(test->unpacked[i][j][k]) != util_inf_sign(unpacked[k])) {
> @@ -236,7 +237,8 @@ test_format_float(unsigned verbose, FILE *fp,
>   PIPE_ALIGN_STACK
>   static boolean
>   test_format_unorm8(unsigned verbose, FILE *fp,
> -                   const struct util_format_description *desc)
> +                   const struct util_format_description *desc,
> +                   unsigned use_cache)
>   {
>      LLVMContextRef context;
>      struct gallivm_state *gallivm;
> @@ -251,7 +253,8 @@ test_format_unorm8(unsigned verbose, FILE *fp,
>      context = LLVMContextCreate();
>      gallivm = gallivm_create("test_module_unorm8", context);
>   
> -   fetch = add_fetch_rgba_test(gallivm, verbose, desc, lp_unorm8_vec4_type());
> +   fetch = add_fetch_rgba_test(gallivm, verbose, desc,
> +                               lp_unorm8_vec4_type(), use_cache);
>   
>      gallivm_compile_module(gallivm);
>   
> @@ -280,7 +283,7 @@ test_format_unorm8(unsigned verbose, FILE *fp,
>   
>                  memset(unpacked, 0, sizeof unpacked);
>   
> -               fetch_ptr(unpacked, packed, j, i, cache_ptr);
> +               fetch_ptr(unpacked, packed, j, i, use_cache ? cache_ptr : NULL);
>   
>                  match = TRUE;
>                  for(k = 0; k < 4; ++k) {
> @@ -335,15 +338,16 @@ test_format_unorm8(unsigned verbose, FILE *fp,
>   
>   static boolean
>   test_one(unsigned verbose, FILE *fp,
> -         const struct util_format_description *format_desc)
> +         const struct util_format_description *format_desc,
> +         unsigned use_cache)
>   {
>      boolean success = TRUE;
>   
> -   if (!test_format_float(verbose, fp, format_desc)) {
> +   if (!test_format_float(verbose, fp, format_desc, use_cache)) {
>        success = FALSE;
>      }
>   
> -   if (!test_format_unorm8(verbose, fp, format_desc)) {
> +   if (!test_format_unorm8(verbose, fp, format_desc, use_cache)) {
>        success = FALSE;
>      }
>   
> @@ -356,49 +360,52 @@ test_all(unsigned verbose, FILE *fp)
>   {
>      enum pipe_format format;
>      boolean success = TRUE;
> +   unsigned use_cache;
>   
> -#if USE_TEXTURE_CACHE
>      cache_ptr = align_malloc(sizeof(struct lp_build_format_cache), 16);
> -#endif
>   
> -   for (format = 1; format < PIPE_FORMAT_COUNT; ++format) {
> -      const struct util_format_description *format_desc;
> +   for (use_cache = 0; use_cache < 2; use_cache++) {
> +      for (format = 1; format < PIPE_FORMAT_COUNT; ++format) {
> +         const struct util_format_description *format_desc;
>   
> -      format_desc = util_format_description(format);
> -      if (!format_desc) {
> -         continue;
> -      }
> +         format_desc = util_format_description(format);
> +         if (!format_desc) {
> +            continue;
> +         }
>   
> +         /*
> +          * TODO: test more
> +          */
>   
> -      /*
> -       * TODO: test more
> -       */
> +         if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
> +            continue;
> +         }
>   
> -      if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
> -         continue;
> -      }
> +         if (util_format_is_pure_integer(format))
> +	    continue;
>   
> -      if (util_format_is_pure_integer(format))
> -	 continue;
> +         /* only have util fetch func for etc1 */
> +         if (format_desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
> +             format != PIPE_FORMAT_ETC1_RGB8) {
> +            continue;
> +         }
>   
> -      /* only have util fetch func for etc1 */
> -      if (format_desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
> -          format != PIPE_FORMAT_ETC1_RGB8) {
> -         continue;
> -      }
> +         /* missing fetch funcs */
> +         if (format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC) {
> +            continue;
> +         }
>   
> -      /* missing fetch funcs */
> -      if (format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC) {
> -         continue;
> -      }
> +         /* only test twice with formats which can use cache */
> +         if (format_desc->layout != UTIL_FORMAT_LAYOUT_S3TC && use_cache) {
> +            continue;
> +         }
>   
> -      if (!test_one(verbose, fp, format_desc)) {
> -           success = FALSE;
> +         if (!test_one(verbose, fp, format_desc, use_cache)) {
> +              success = FALSE;
> +         }
>         }
>      }
> -#if USE_TEXTURE_CACHE
>      align_free(cache_ptr);
> -#endif
>   
>      return success;
>   }
> 

Reviwed-by: Jose Fonseca <jfonseca at vmware.com>


More information about the mesa-dev mailing list