[Mesa-dev] [PATCH] gallivm: don't use pavg.b intrinsic on llvm >= 6.0
Jose Fonseca
jfonseca at vmware.com
Fri Dec 21 07:41:40 UTC 2018
On 21/12/2018 00:25, sroland at vmware.com wrote:
> From: Roland Scheidegger <sroland at vmware.com>
>
> This intrinsic disppeared with llvm 6.0, using it ends up in segfaults
> (due to llvm issuing call to NULL address in the jited shaders).
> Add code doing the same thing as the autoupgrade code in llvm so it
> can be matched and replaced back with a pavgb.
>
> While here, also improve lp_test_format, so it tests both with and without
> cache (as it was, it tested the cache versions only, whereas cache is
> actually disabled in llvmpipe, and in any case even with it enabled
> vertex and geometry shaders wouldn't use it). (Although at least for
> the unorm8 uncached fetch, the code is still quite different to what
> llvmpipe is using, since that would use unorm8x16 type, whereas
> the test code is using unorm8x4 type, hence disabling some intrinsic
> paths.)
>
> Fixes: 6f4083143bb8c478ccfcaef034d183d89b471993
> ---
> .../auxiliary/gallivm/lp_bld_format_s3tc.c | 55 +++++++++--
> src/gallium/drivers/llvmpipe/lp_test_format.c | 91 ++++++++++---------
> 2 files changed, 95 insertions(+), 51 deletions(-)
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c b/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c
> index 2b143566f24..9561c349dad 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c
> @@ -457,6 +457,50 @@ color_expand_565_to_8888(struct gallivm_state *gallivm,
> }
>
>
> +/*
> + * Average two byte vectors. (Will always round up.)
> + */
> +static LLVMValueRef
> +lp_build_pavgb(struct lp_build_context *bld8,
> + LLVMValueRef v0,
> + LLVMValueRef v1)
> +{
> + struct gallivm_state *gallivm = bld8->gallivm;
> + LLVMBuilderRef builder = gallivm->builder;
> + assert(bld8->type.width == 8);
> + assert(bld8->type.length == 16 || bld8->type.length == 32);
> + if (HAVE_LLVM < 0x0600) {
> + LLVMValueRef intrargs[2];
> + char *intr_name = bld8->type.length == 32 ? "llvm.x86.avx2.pavg.b" :
> + "llvm.x86.sse2.pavg.b";
> + intrargs[0] = v0;
> + intrargs[1] = v1;
> + return lp_build_intrinsic(builder, intr_name,
> + bld8->vec_type, intrargs, 2, 0);
> + } else {
> + /*
> + * Must match llvm's autoupgrade of pavg.b intrinsic to be useful.
> + * You better hope the backend code manages to detect the pattern, and
> + * the pattern doesn't change there...
> + */
> + struct lp_type type_ext = bld8->type;
> + LLVMTypeRef vec_type_ext;
> + LLVMValueRef res;
> + LLVMValueRef ext_one;
> + type_ext.width = 16;
> + vec_type_ext = lp_build_vec_type(gallivm, type_ext);
> + ext_one = lp_build_const_vec(gallivm, type_ext, 1);
> +
> + v0 = LLVMBuildZExt(builder, v0, vec_type_ext, "");
> + v1 = LLVMBuildZExt(builder, v1, vec_type_ext, "");
> + res = LLVMBuildAdd(builder, v0, v1, "");
> + res = LLVMBuildAdd(builder, res, ext_one, "");
> + res = LLVMBuildLShr(builder, res, ext_one, "");
> + res = LLVMBuildTrunc(builder, res, bld8->vec_type, "");
> + return res;
> + }
> +}
> +
> /**
> * Calculate 1/3(v1-v0) + v0
> * and 2*1/3(v1-v0) + v0
> @@ -602,13 +646,7 @@ s3tc_dxt1_full_to_rgba_aos(struct gallivm_state *gallivm,
> */
> if ((util_cpu_caps.has_sse2 && n == 4) ||
> (util_cpu_caps.has_avx2 && n == 8)) {
> - LLVMValueRef intrargs[2];
> - char *intr_name = n == 8 ? "llvm.x86.avx2.pavg.b" :
> - "llvm.x86.sse2.pavg.b";
> - intrargs[0] = colors0;
> - intrargs[1] = colors1;
> - color2_2 = lp_build_intrinsic(builder, intr_name,
> - bld8.vec_type, intrargs, 2, 0);
> + color2_2 = lp_build_pavgb(&bld8, colors0, colors1);
> color2_2 = LLVMBuildBitCast(builder, color2_2, bld32.vec_type, "");
> }
> else {
> @@ -1278,8 +1316,7 @@ s3tc_decode_block_dxt1(struct gallivm_state *gallivm,
> /* same interleave as for lerp23 - correct result in 2nd element */
> intrargs[1] = lp_build_interleave2(gallivm, type32, color01, color01, 0);
> intrargs[1] = LLVMBuildBitCast(builder, intrargs[1], bld8.vec_type, "");
> - color2_2 = lp_build_intrinsic(builder, "llvm.x86.sse2.pavg.b",
> - bld8.vec_type, intrargs, 2, 0);
> + color2_2 = lp_build_pavgb(&bld8, intrargs[0], intrargs[1]);
> }
> else {
> LLVMValueRef v01, v0, v1, vhalf;
> diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
> index a8aa33d8ae9..885d886cfa9 100644
> --- a/src/gallium/drivers/llvmpipe/lp_test_format.c
> +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
> @@ -44,8 +44,6 @@
>
> #include "lp_test.h"
>
> -#define USE_TEXTURE_CACHE 1
> -
> static struct lp_build_format_cache *cache_ptr;
>
> void
> @@ -80,7 +78,8 @@ typedef void
> static LLVMValueRef
> add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose,
> const struct util_format_description *desc,
> - struct lp_type type)
> + struct lp_type type,
> + unsigned use_cache)
> {
> char name[256];
> LLVMContextRef context = gallivm->context;
> @@ -114,7 +113,7 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose,
> i = LLVMGetParam(func, 2);
> j = LLVMGetParam(func, 3);
>
> - if (cache_ptr) {
> + if (use_cache) {
> cache = LLVMGetParam(func, 4);
> }
>
> @@ -137,7 +136,8 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose,
> PIPE_ALIGN_STACK
> static boolean
> test_format_float(unsigned verbose, FILE *fp,
> - const struct util_format_description *desc)
> + const struct util_format_description *desc,
> + unsigned use_cache)
> {
> LLVMContextRef context;
> struct gallivm_state *gallivm;
> @@ -152,7 +152,8 @@ test_format_float(unsigned verbose, FILE *fp,
> context = LLVMContextCreate();
> gallivm = gallivm_create("test_module_float", context);
>
> - fetch = add_fetch_rgba_test(gallivm, verbose, desc, lp_float32_vec4_type());
> + fetch = add_fetch_rgba_test(gallivm, verbose, desc,
> + lp_float32_vec4_type(), use_cache);
>
> gallivm_compile_module(gallivm);
>
> @@ -181,7 +182,7 @@ test_format_float(unsigned verbose, FILE *fp,
>
> memset(unpacked, 0, sizeof unpacked);
>
> - fetch_ptr(unpacked, packed, j, i, cache_ptr);
> + fetch_ptr(unpacked, packed, j, i, use_cache ? cache_ptr : NULL);
>
> for(k = 0; k < 4; ++k) {
> if (util_double_inf_sign(test->unpacked[i][j][k]) != util_inf_sign(unpacked[k])) {
> @@ -236,7 +237,8 @@ test_format_float(unsigned verbose, FILE *fp,
> PIPE_ALIGN_STACK
> static boolean
> test_format_unorm8(unsigned verbose, FILE *fp,
> - const struct util_format_description *desc)
> + const struct util_format_description *desc,
> + unsigned use_cache)
> {
> LLVMContextRef context;
> struct gallivm_state *gallivm;
> @@ -251,7 +253,8 @@ test_format_unorm8(unsigned verbose, FILE *fp,
> context = LLVMContextCreate();
> gallivm = gallivm_create("test_module_unorm8", context);
>
> - fetch = add_fetch_rgba_test(gallivm, verbose, desc, lp_unorm8_vec4_type());
> + fetch = add_fetch_rgba_test(gallivm, verbose, desc,
> + lp_unorm8_vec4_type(), use_cache);
>
> gallivm_compile_module(gallivm);
>
> @@ -280,7 +283,7 @@ test_format_unorm8(unsigned verbose, FILE *fp,
>
> memset(unpacked, 0, sizeof unpacked);
>
> - fetch_ptr(unpacked, packed, j, i, cache_ptr);
> + fetch_ptr(unpacked, packed, j, i, use_cache ? cache_ptr : NULL);
>
> match = TRUE;
> for(k = 0; k < 4; ++k) {
> @@ -335,15 +338,16 @@ test_format_unorm8(unsigned verbose, FILE *fp,
>
> static boolean
> test_one(unsigned verbose, FILE *fp,
> - const struct util_format_description *format_desc)
> + const struct util_format_description *format_desc,
> + unsigned use_cache)
> {
> boolean success = TRUE;
>
> - if (!test_format_float(verbose, fp, format_desc)) {
> + if (!test_format_float(verbose, fp, format_desc, use_cache)) {
> success = FALSE;
> }
>
> - if (!test_format_unorm8(verbose, fp, format_desc)) {
> + if (!test_format_unorm8(verbose, fp, format_desc, use_cache)) {
> success = FALSE;
> }
>
> @@ -356,49 +360,52 @@ test_all(unsigned verbose, FILE *fp)
> {
> enum pipe_format format;
> boolean success = TRUE;
> + unsigned use_cache;
>
> -#if USE_TEXTURE_CACHE
> cache_ptr = align_malloc(sizeof(struct lp_build_format_cache), 16);
> -#endif
>
> - for (format = 1; format < PIPE_FORMAT_COUNT; ++format) {
> - const struct util_format_description *format_desc;
> + for (use_cache = 0; use_cache < 2; use_cache++) {
> + for (format = 1; format < PIPE_FORMAT_COUNT; ++format) {
> + const struct util_format_description *format_desc;
>
> - format_desc = util_format_description(format);
> - if (!format_desc) {
> - continue;
> - }
> + format_desc = util_format_description(format);
> + if (!format_desc) {
> + continue;
> + }
>
> + /*
> + * TODO: test more
> + */
>
> - /*
> - * TODO: test more
> - */
> + if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
> + continue;
> + }
>
> - if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
> - continue;
> - }
> + if (util_format_is_pure_integer(format))
> + continue;
>
> - if (util_format_is_pure_integer(format))
> - continue;
> + /* only have util fetch func for etc1 */
> + if (format_desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
> + format != PIPE_FORMAT_ETC1_RGB8) {
> + continue;
> + }
>
> - /* only have util fetch func for etc1 */
> - if (format_desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
> - format != PIPE_FORMAT_ETC1_RGB8) {
> - continue;
> - }
> + /* missing fetch funcs */
> + if (format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC) {
> + continue;
> + }
>
> - /* missing fetch funcs */
> - if (format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC) {
> - continue;
> - }
> + /* only test twice with formats which can use cache */
> + if (format_desc->layout != UTIL_FORMAT_LAYOUT_S3TC && use_cache) {
> + continue;
> + }
>
> - if (!test_one(verbose, fp, format_desc)) {
> - success = FALSE;
> + if (!test_one(verbose, fp, format_desc, use_cache)) {
> + success = FALSE;
> + }
> }
> }
> -#if USE_TEXTURE_CACHE
> align_free(cache_ptr);
> -#endif
>
> return success;
> }
>
Reviwed-by: Jose Fonseca <jfonseca at vmware.com>
More information about the mesa-dev
mailing list