[Mesa-dev] [Bug 111496] dEQP-GLES31.functional.shaders.builtin_functions.integer.umulextended.uint_highp_vertex fails with bad intrinsic
bugzilla-daemon at freedesktop.org
bugzilla-daemon at freedesktop.org
Tue Aug 27 07:32:44 UTC 2019
https://bugs.freedesktop.org/show_bug.cgi?id=111496
Bug ID: 111496
Summary: dEQP-GLES31.functional.shaders.builtin_functions.integ
er.umulextended.uint_highp_vertex fails with bad
intrinsic
Product: Mesa
Version: unspecified
Hardware: Other
OS: All
Status: NEW
Severity: not set
Priority: not set
Component: Drivers/Gallium/llvmpipe
Assignee: mesa-dev at lists.freedesktop.org
Reporter: airlied at freedesktop.org
QA Contact: mesa-dev at lists.freedesktop.org
Debug below:
llvm (version 0x800) found no intrinsic for llvm.x86.avx2.pmulu.dq, going to
crash...
On a skylake cpu.
llvmpipe: Fragment shader #131 variant #0:
FRAG
DCL IN[0].xy, GENERIC[9], CONSTANT
DCL OUT[0], COLOR
DCL OUT[1], COLOR[1]
0: MOV OUT[1].x, IN[0].xxxx
1: MOV OUT[0].x, IN[0].yyyy
2: END
fs variant 0x1f0f0bc:
cbuf_format[0] = PIPE_FORMAT_R32_UINT
cbuf_format[1] = PIPE_FORMAT_R32_UINT
blend.colormask = 0x1
variant->opaque = 0
; ModuleID = 'fs131_variant0'
source_filename = "fs131_variant0"
target datalayout = "e-p:64:64:64-i64:64:64-a0:0:64-s0:64:64"
; Function Attrs: nounwind readnone speculatable
declare <8 x float> @llvm.fmuladd.v8f32(<8 x float>, <8 x float>, <8 x float>)
#0
; Function Attrs: nounwind readnone
declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) #1
; Function Attrs: nounwind readnone
declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) #1
define void @fs131_variant0_partial({ [16 x float*], [16 x i32], [128 x { i32,
i32, i32, i8*, [14 x i32], [14 x i32], i32, i32, [14 x i32] }], [32 x { float,
float, float, [4 x float] }], [32 x { i32, i32, i32, i8*, i32, i32 }], float,
i32, i32, i8*, float*, { float, float }*, [16 x i32*], [16 x i32] }* noalias
%context, i32 %x, i32 %y, i32, float* noalias %a0, float* noalias %dadx, float*
noalias %dady, <16 x i8>** noalias %color_ptr_ptr, i8* noalias %depth, i32
%mask_input, { { [2048 x i32], [128 x i64] }*, i64, i64, i32 }* noalias
%thread_data, i32* noalias %stride_ptr, i32 %depth_stride) {
entry:
%output16 = alloca <8 x float>
%output15 = alloca <8 x float>
%output14 = alloca <8 x float>
%output13 = alloca <8 x float>
%output12 = alloca <8 x float>
%output11 = alloca <8 x float>
%output10 = alloca <8 x float>
%output = alloca <8 x float>
%looplimiter = alloca i32
%execution_mask = alloca <8 x i32>
%color9 = alloca <8 x float>, i32 2
%color8 = alloca <8 x float>, i32 2
%color7 = alloca <8 x float>, i32 2
%color6 = alloca <8 x float>, i32 2
%color5 = alloca <8 x float>, i32 2
%color4 = alloca <8 x float>, i32 2
%color3 = alloca <8 x float>, i32 2
%color = alloca <8 x float>, i32 2
%loop_counter = alloca i32
%1 = alloca <8 x float>, i32 2
%2 = alloca <8 x float>, i32 2
%mask_store = alloca <8 x i32>, i32 2
%thread_data.invocs_ptr = getelementptr { { [2048 x i32], [128 x i64] }*,
i64, i64, i32 }, { { [2048 x i32], [128 x i64] }*, i64, i64, i32 }*
%thread_data, i32 0, i32 2
%3 = load i64, i64* %thread_data.invocs_ptr
%invoc_count = add i64 %3, 1
store i64 %invoc_count, i64* %thread_data.invocs_ptr
%4 = sitofp i32 %x to float
%5 = sitofp i32 %y to float
%6 = getelementptr <8 x float>, <8 x float>* %2, i32 0
store <8 x float> <float 0.000000e+00, float 1.000000e+00, float
0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float
2.000000e+00, float 3.000000e+00>, <8 x float>* %6
%7 = getelementptr <8 x float>, <8 x float>* %1, i32 0
store <8 x float> <float 0.000000e+00, float 0.000000e+00, float
1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float
1.000000e+00, float 1.000000e+00>, <8 x float>* %7
%8 = getelementptr <8 x float>, <8 x float>* %2, i32 1
store <8 x float> <float 0.000000e+00, float 1.000000e+00, float
0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float
2.000000e+00, float 3.000000e+00>, <8 x float>* %8
%9 = getelementptr <8 x float>, <8 x float>* %1, i32 1
store <8 x float> <float 2.000000e+00, float 2.000000e+00, float
3.000000e+00, float 3.000000e+00, float 2.000000e+00, float 2.000000e+00, float
3.000000e+00, float 3.000000e+00>, <8 x float>* %9
%10 = getelementptr float, float* %dadx, i32 0
%11 = bitcast float* %10 to <4 x float>*
%pos.x.dadxaos = load <4 x float>, <4 x float>* %11
%12 = getelementptr float, float* %dady, i32 0
%13 = bitcast float* %12 to <4 x float>*
%pos.x.dadyaos = load <4 x float>, <4 x float>* %13
%14 = getelementptr float, float* %a0, i32 0
%15 = bitcast float* %14 to <4 x float>*
%pos.x.a0aos = load <4 x float>, <4 x float>* %15
%16 = getelementptr float, float* %a0, i32 4
%17 = bitcast float* %16 to <4 x float>*
%input0.x.a0aos = load <4 x float>, <4 x float>* %17
%mask_ptr = getelementptr <8 x i32>, <8 x i32>* %mask_store, i32 0
%18 = lshr i32 %mask_input, 0
%19 = insertelement <8 x i32> undef, i32 %18, i32 0
%20 = shufflevector <8 x i32> %19, <8 x i32> undef, <8 x i32> zeroinitializer
%21 = and <8 x i32> %20, <i32 1, i32 2, i32 16, i32 32, i32 4, i32 8, i32 64,
i32 128>
%22 = icmp eq <8 x i32> %21, <i32 1, i32 2, i32 16, i32 32, i32 4, i32 8, i32
64, i32 128>
%23 = sext <8 x i1> %22 to <8 x i32>
store <8 x i32> %23, <8 x i32>* %mask_ptr
%mask_ptr1 = getelementptr <8 x i32>, <8 x i32>* %mask_store, i32 1
%24 = lshr i32 %mask_input, 8
%25 = insertelement <8 x i32> undef, i32 %24, i32 0
%26 = shufflevector <8 x i32> %25, <8 x i32> undef, <8 x i32> zeroinitializer
%27 = and <8 x i32> %26, <i32 1, i32 2, i32 16, i32 32, i32 4, i32 8, i32 64,
i32 128>
%28 = icmp eq <8 x i32> %27, <i32 1, i32 2, i32 16, i32 32, i32 4, i32 8, i32
64, i32 128>
%29 = sext <8 x i1> %28 to <8 x i32>
store <8 x i32> %29, <8 x i32>* %mask_ptr1
%context.stencil_ref_front_ptr = getelementptr { [16 x float*], [16 x i32],
[128 x { i32, i32, i32, i8*, [14 x i32], [14 x i32], i32, i32, [14 x i32] }],
[32 x { float, float, float, [4 x float] }], [32 x { i32, i32, i32, i8*, i32,
i32 }], float, i32, i32, i8*, float*, { float, float }*, [16 x i32*], [16 x
i32] }, { [16 x float*], [16 x i32], [128 x { i32, i32, i32, i8*, [14 x i32],
[14 x i32], i32, i32, [14 x i32] }], [32 x { float, float, float, [4 x float]
}], [32 x { i32, i32, i32, i8*, i32, i32 }], float, i32, i32, i8*, float*, {
float, float }*, [16 x i32*], [16 x i32] }* %context, i32 0, i32 6
%context.stencil_ref_front = load i32, i32* %context.stencil_ref_front_ptr
%context.stencil_ref_back_ptr = getelementptr { [16 x float*], [16 x i32],
[128 x { i32, i32, i32, i8*, [14 x i32], [14 x i32], i32, i32, [14 x i32] }],
[32 x { float, float, float, [4 x float] }], [32 x { i32, i32, i32, i8*, i32,
i32 }], float, i32, i32, i8*, float*, { float, float }*, [16 x i32*], [16 x
i32] }, { [16 x float*], [16 x i32], [128 x { i32, i32, i32, i8*, [14 x i32],
[14 x i32], i32, i32, [14 x i32] }], [32 x { float, float, float, [4 x float]
}], [32 x { i32, i32, i32, i8*, i32, i32 }], float, i32, i32, i8*, float*, {
float, float }*, [16 x i32*], [16 x i32] }* %context, i32 0, i32 7
%context.stencil_ref_back = load i32, i32* %context.stencil_ref_back_ptr
%30 = insertelement <8 x i32> undef, i32 %context.stencil_ref_front, i32 0
%31 = shufflevector <8 x i32> %30, <8 x i32> undef, <8 x i32> zeroinitializer
%32 = insertelement <8 x i32> undef, i32 %context.stencil_ref_back, i32 0
%33 = shufflevector <8 x i32> %32, <8 x i32> undef, <8 x i32> zeroinitializer
%context.constants_ptr = getelementptr { [16 x float*], [16 x i32], [128 x {
i32, i32, i32, i8*, [14 x i32], [14 x i32], i32, i32, [14 x i32] }], [32 x {
float, float, float, [4 x float] }], [32 x { i32, i32, i32, i8*, i32, i32 }],
float, i32, i32, i8*, float*, { float, float }*, [16 x i32*], [16 x i32] }, {
[16 x float*], [16 x i32], [128 x { i32, i32, i32, i8*, [14 x i32], [14 x i32],
i32, i32, [14 x i32] }], [32 x { float, float, float, [4 x float] }], [32 x {
i32, i32, i32, i8*, i32, i32 }], float, i32, i32, i8*, float*, { float, float
}*, [16 x i32*], [16 x i32] }* %context, i32 0, i32 0
%context.num_constants_ptr = getelementptr { [16 x float*], [16 x i32], [128
x { i32, i32, i32, i8*, [14 x i32], [14 x i32], i32, i32, [14 x i32] }], [32 x
{ float, float, float, [4 x float] }], [32 x { i32, i32, i32, i8*, i32, i32 }],
float, i32, i32, i8*, float*, { float, float }*, [16 x i32*], [16 x i32] }, {
[16 x float*], [16 x i32], [128 x { i32, i32, i32, i8*, [14 x i32], [14 x i32],
i32, i32, [14 x i32] }], [32 x { float, float, float, [4 x float] }], [32 x {
i32, i32, i32, i8*, i32, i32 }], float, i32, i32, i8*, float*, { float, float
}*, [16 x i32*], [16 x i32] }* %context, i32 0, i32 1
%context.ssbos_ptr = getelementptr { [16 x float*], [16 x i32], [128 x { i32,
i32, i32, i8*, [14 x i32], [14 x i32], i32, i32, [14 x i32] }], [32 x { float,
float, float, [4 x float] }], [32 x { i32, i32, i32, i8*, i32, i32 }], float,
i32, i32, i8*, float*, { float, float }*, [16 x i32*], [16 x i32] }, { [16 x
float*], [16 x i32], [128 x { i32, i32, i32, i8*, [14 x i32], [14 x i32], i32,
i32, [14 x i32] }], [32 x { float, float, float, [4 x float] }], [32 x { i32,
i32, i32, i8*, i32, i32 }], float, i32, i32, i8*, float*, { float, float }*,
[16 x i32*], [16 x i32] }* %context, i32 0, i32 11
%context.num_ssbos_ptr = getelementptr { [16 x float*], [16 x i32], [128 x {
i32, i32, i32, i8*, [14 x i32], [14 x i32], i32, i32, [14 x i32] }], [32 x {
float, float, float, [4 x float] }], [32 x { i32, i32, i32, i8*, i32, i32 }],
float, i32, i32, i8*, float*, { float, float }*, [16 x i32*], [16 x i32] }, {
[16 x float*], [16 x i32], [128 x { i32, i32, i32, i8*, [14 x i32], [14 x i32],
i32, i32, [14 x i32] }], [32 x { float, float, float, [4 x float] }], [32 x {
i32, i32, i32, i8*, i32, i32 }], float, i32, i32, i8*, float*, { float, float
}*, [16 x i32*], [16 x i32] }* %context, i32 0, i32 12
store i32 0, i32* %loop_counter
store i32 0, i32* %loop_counter
br label %loop_begin
loop_begin: ; preds = %skip, %entry
%34 = load i32, i32* %loop_counter
%35 = icmp ult i32 %34, 2
br i1 %35, label %loop_body, label %loop_exit
loop_body: ; preds = %loop_begin
%mask_ptr2 = getelementptr <8 x i32>, <8 x i32>* %mask_store, i32 %34
%36 = load <8 x i32>, <8 x i32>* %mask_ptr2
store <8 x i32> zeroinitializer, <8 x i32>* %execution_mask
store <8 x i32> %36, <8 x i32>* %execution_mask
%37 = load <8 x i32>, <8 x i32>* %execution_mask
%38 = bitcast <8 x i32> %37 to i256
%39 = icmp eq i256 %38, 0
br i1 %39, label %skip, label %40
; <label>:40: ; preds = %loop_body
%41 = getelementptr <8 x float>, <8 x float>* %2, i32 %34
%42 = load <8 x float>, <8 x float>* %41
%43 = getelementptr <8 x float>, <8 x float>* %1, i32 %34
%44 = load <8 x float>, <8 x float>* %43
%45 = insertelement <8 x float> undef, float %4, i32 0
%46 = shufflevector <8 x float> %45, <8 x float> undef, <8 x i32>
zeroinitializer
%47 = fadd <8 x float> %42, %46
%48 = insertelement <8 x float> undef, float %5, i32 0
%49 = shufflevector <8 x float> %48, <8 x float> undef, <8 x i32>
zeroinitializer
%50 = fadd <8 x float> %44, %49
%51 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> <float 1.000000e+00,
float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00,
float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <8 x float> %47,
<8 x float> <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float
5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float
5.000000e-01>) #2
%52 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> zeroinitializer, <8 x
float> %50, <8 x float> %51) #2
%53 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> zeroinitializer, <8 x
float> %47, <8 x float> <float 5.000000e-01, float 5.000000e-01, float
5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float
5.000000e-01, float 5.000000e-01>) #2
%54 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> <float 1.000000e+00,
float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00,
float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <8 x float> %50,
<8 x float> %53) #2
%55 = shufflevector <4 x float> %pos.x.dadxaos, <4 x float> undef, <8 x i32>
<i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
%56 = shufflevector <4 x float> %pos.x.dadyaos, <4 x float> undef, <8 x i32>
<i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
%57 = shufflevector <4 x float> %pos.x.a0aos, <4 x float> undef, <8 x i32>
<i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
%58 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %55, <8 x float> %47,
<8 x float> %57) #2
%59 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %56, <8 x float> %50,
<8 x float> %58) #2
%60 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %59, <8 x float>
<float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float
1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float
1.000000e+00>) #2
%61 = shufflevector <4 x float> %pos.x.dadxaos, <4 x float> undef, <8 x i32>
<i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
%62 = shufflevector <4 x float> %pos.x.dadyaos, <4 x float> undef, <8 x i32>
<i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
%63 = shufflevector <4 x float> %pos.x.a0aos, <4 x float> undef, <8 x i32>
<i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
%64 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %61, <8 x float> %47,
<8 x float> %63) #2
%65 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %62, <8 x float> %50,
<8 x float> %64) #2
%66 = getelementptr <8 x float>, <8 x float>* %2, i32 %34
%67 = load <8 x float>, <8 x float>* %66
%68 = getelementptr <8 x float>, <8 x float>* %1, i32 %34
%69 = load <8 x float>, <8 x float>* %68
%70 = insertelement <8 x float> undef, float %4, i32 0
%71 = shufflevector <8 x float> %70, <8 x float> undef, <8 x i32>
zeroinitializer
%72 = fadd <8 x float> %67, %71
%73 = insertelement <8 x float> undef, float %5, i32 0
%74 = shufflevector <8 x float> %73, <8 x float> undef, <8 x i32>
zeroinitializer
%75 = fadd <8 x float> %69, %74
%76 = shufflevector <4 x float> %input0.x.a0aos, <4 x float> undef, <8 x i32>
zeroinitializer
%77 = shufflevector <4 x float> %input0.x.a0aos, <4 x float> undef, <8 x i32>
<i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
store i32 0, i32* %looplimiter
store i32 65535, i32* %looplimiter
store <8 x float> zeroinitializer, <8 x float>* %output
store <8 x float> zeroinitializer, <8 x float>* %output10
store <8 x float> zeroinitializer, <8 x float>* %output11
store <8 x float> zeroinitializer, <8 x float>* %output12
store <8 x float> zeroinitializer, <8 x float>* %output13
store <8 x float> zeroinitializer, <8 x float>* %output14
store <8 x float> zeroinitializer, <8 x float>* %output15
store <8 x float> zeroinitializer, <8 x float>* %output16
store <8 x float> %76, <8 x float>* %output13
store <8 x float> %77, <8 x float>* %output
%color0.r = load <8 x float>, <8 x float>* %output
%78 = getelementptr <8 x float>, <8 x float>* %color, i32 %34
store <8 x float> %color0.r, <8 x float>* %78
%color0.g = load <8 x float>, <8 x float>* %output10
%79 = getelementptr <8 x float>, <8 x float>* %color3, i32 %34
store <8 x float> %color0.g, <8 x float>* %79
%color0.b = load <8 x float>, <8 x float>* %output11
%80 = getelementptr <8 x float>, <8 x float>* %color4, i32 %34
store <8 x float> %color0.b, <8 x float>* %80
%color0.a = load <8 x float>, <8 x float>* %output12
%81 = getelementptr <8 x float>, <8 x float>* %color5, i32 %34
store <8 x float> %color0.a, <8 x float>* %81
%color1.r = load <8 x float>, <8 x float>* %output13
%82 = getelementptr <8 x float>, <8 x float>* %color6, i32 %34
store <8 x float> %color1.r, <8 x float>* %82
%color1.g = load <8 x float>, <8 x float>* %output14
%83 = getelementptr <8 x float>, <8 x float>* %color7, i32 %34
store <8 x float> %color1.g, <8 x float>* %83
%color1.b = load <8 x float>, <8 x float>* %output15
%84 = getelementptr <8 x float>, <8 x float>* %color8, i32 %34
store <8 x float> %color1.b, <8 x float>* %84
%color1.a = load <8 x float>, <8 x float>* %output16
%85 = getelementptr <8 x float>, <8 x float>* %color9, i32 %34
store <8 x float> %color1.a, <8 x float>* %85
br label %skip
skip: ; preds = %40, %loop_body
%86 = load <8 x i32>, <8 x i32>* %execution_mask
store <8 x i32> %86, <8 x i32>* %mask_ptr2
%87 = add i32 %34, 1
store i32 %87, i32* %loop_counter
br label %loop_begin
loop_exit: ; preds = %loop_begin
%88 = getelementptr <8 x i32>, <8 x i32>* %mask_store, i32 0
%mask = load <8 x i32>, <8 x i32>* %88
%89 = getelementptr <8 x float>, <8 x float>* %color, i32 0
%90 = getelementptr <8 x float>, <8 x float>* %color3, i32 0
%91 = getelementptr <8 x float>, <8 x float>* %color4, i32 0
%92 = getelementptr <8 x float>, <8 x float>* %color5, i32 0
%93 = getelementptr <8 x float>, <8 x float>* %color6, i32 0
%94 = getelementptr <8 x float>, <8 x float>* %color7, i32 0
%95 = getelementptr <8 x float>, <8 x float>* %color8, i32 0
%96 = getelementptr <8 x float>, <8 x float>* %color9, i32 0
%97 = getelementptr <8 x i32>, <8 x i32>* %mask_store, i32 1
%mask17 = load <8 x i32>, <8 x i32>* %97
%98 = getelementptr <8 x float>, <8 x float>* %color, i32 1
%99 = getelementptr <8 x float>, <8 x float>* %color3, i32 1
%100 = getelementptr <8 x float>, <8 x float>* %color4, i32 1
%101 = getelementptr <8 x float>, <8 x float>* %color5, i32 1
%102 = getelementptr <8 x float>, <8 x float>* %color6, i32 1
%103 = getelementptr <8 x float>, <8 x float>* %color7, i32 1
%104 = getelementptr <8 x float>, <8 x float>* %color8, i32 1
%105 = getelementptr <8 x float>, <8 x float>* %color9, i32 1
%106 = getelementptr <16 x i8>*, <16 x i8>** %color_ptr_ptr, i32 0
%color_ptr0 = load <16 x i8>*, <16 x i8>** %106
%107 = getelementptr i32, i32* %stride_ptr, i32 0
%108 = load i32, i32* %107
%109 = load <8 x float>, <8 x float>* %92
%110 = load <8 x float>, <8 x float>* %89
%111 = shufflevector <8 x i32> %mask, <8 x i32> %mask, <4 x i32> <i32 0, i32
1, i32 2, i32 3>
%112 = shufflevector <8 x i32> %mask, <8 x i32> %mask, <4 x i32> <i32 4, i32
5, i32 6, i32 7>
%113 = shufflevector <8 x float> %109, <8 x float> %109, <4 x i32> <i32 0,
i32 1, i32 2, i32 3>
%114 = shufflevector <8 x float> %109, <8 x float> %109, <4 x i32> <i32 4,
i32 5, i32 6, i32 7>
%115 = load <8 x float>, <8 x float>* %101
%116 = load <8 x float>, <8 x float>* %98
%117 = shufflevector <8 x i32> %mask17, <8 x i32> %mask17, <4 x i32> <i32 0,
i32 1, i32 2, i32 3>
%118 = shufflevector <8 x i32> %mask17, <8 x i32> %mask17, <4 x i32> <i32 4,
i32 5, i32 6, i32 7>
%119 = shufflevector <8 x float> %115, <8 x float> %115, <4 x i32> <i32 0,
i32 1, i32 2, i32 3>
%120 = shufflevector <8 x float> %115, <8 x float> %115, <4 x i32> <i32 4,
i32 5, i32 6, i32 7>
%121 = bitcast <8 x float> %110 to <8 x i32>
%122 = bitcast <8 x float> %116 to <8 x i32>
%123 = shufflevector <8 x i32> %122, <8 x i32> %122, <4 x i32> <i32 4, i32 5,
i32 6, i32 7>
%124 = shufflevector <8 x i32> %122, <8 x i32> %122, <4 x i32> <i32 0, i32 1,
i32 2, i32 3>
%125 = shufflevector <8 x i32> %121, <8 x i32> %121, <4 x i32> <i32 4, i32 5,
i32 6, i32 7>
%126 = shufflevector <8 x i32> %121, <8 x i32> %121, <4 x i32> <i32 0, i32 1,
i32 2, i32 3>
%127 = bitcast <4 x i32> %126 to <2 x i64>
%128 = bitcast <4 x i32> %125 to <2 x i64>
%129 = shufflevector <2 x i64> %127, <2 x i64> %128, <2 x i32> <i32 0, i32 2>
%130 = shufflevector <2 x i64> %127, <2 x i64> %128, <2 x i32> <i32 1, i32 3>
%131 = bitcast <2 x i64> %129 to <4 x i32>
%132 = bitcast <2 x i64> %130 to <4 x i32>
%133 = bitcast <4 x i32> %124 to <2 x i64>
%134 = bitcast <4 x i32> %123 to <2 x i64>
%135 = shufflevector <2 x i64> %133, <2 x i64> %134, <2 x i32> <i32 0, i32 2>
%136 = shufflevector <2 x i64> %133, <2 x i64> %134, <2 x i32> <i32 1, i32 3>
%137 = bitcast <2 x i64> %135 to <4 x i32>
%138 = bitcast <2 x i64> %136 to <4 x i32>
%context.f_blend_color_ptr = getelementptr { [16 x float*], [16 x i32], [128
x { i32, i32, i32, i8*, [14 x i32], [14 x i32], i32, i32, [14 x i32] }], [32 x
{ float, float, float, [4 x float] }], [32 x { i32, i32, i32, i8*, i32, i32 }],
float, i32, i32, i8*, float*, { float, float }*, [16 x i32*], [16 x i32] }, {
[16 x float*], [16 x i32], [128 x { i32, i32, i32, i8*, [14 x i32], [14 x i32],
i32, i32, [14 x i32] }], [32 x { float, float, float, [4 x float] }], [32 x {
i32, i32, i32, i8*, i32, i32 }], float, i32, i32, i8*, float*, { float, float
}*, [16 x i32*], [16 x i32] }* %context, i32 0, i32 9
%context.f_blend_color = load float*, float** %context.f_blend_color_ptr
%139 = bitcast float* %context.f_blend_color to <4 x i32>*
%140 = getelementptr <4 x i32>, <4 x i32>* %139, i32 0
%141 = load <4 x i32>, <4 x i32>* %140
%142 = shufflevector <4 x i32> %141, <4 x i32> undef, <4 x i32> <i32 3, i32
3, i32 3, i32 3>
%143 = shufflevector <4 x i32> %141, <4 x i32> undef, <4 x i32>
zeroinitializer
%144 = bitcast <4 x i32> %111 to <2 x i64>
%145 = bitcast <4 x i32> %112 to <2 x i64>
%146 = shufflevector <2 x i64> %144, <2 x i64> %145, <2 x i32> <i32 0, i32 2>
%147 = shufflevector <2 x i64> %144, <2 x i64> %145, <2 x i32> <i32 1, i32 3>
%148 = bitcast <2 x i64> %146 to <4 x i32>
%149 = bitcast <2 x i64> %147 to <4 x i32>
%150 = bitcast <4 x i32> %117 to <2 x i64>
%151 = bitcast <4 x i32> %118 to <2 x i64>
%152 = shufflevector <2 x i64> %150, <2 x i64> %151, <2 x i32> <i32 0, i32 2>
%153 = shufflevector <2 x i64> %150, <2 x i64> %151, <2 x i32> <i32 1, i32 3>
%154 = bitcast <2 x i64> %152 to <4 x i32>
%155 = bitcast <2 x i64> %153 to <4 x i32>
%156 = bitcast <4 x float> %113 to <2 x i64>
%157 = bitcast <4 x float> %114 to <2 x i64>
%158 = shufflevector <2 x i64> %156, <2 x i64> %157, <2 x i32> <i32 0, i32 2>
%159 = shufflevector <2 x i64> %156, <2 x i64> %157, <2 x i32> <i32 1, i32 3>
%160 = bitcast <2 x i64> %158 to <4 x i32>
%161 = bitcast <2 x i64> %159 to <4 x i32>
%162 = bitcast <4 x float> %119 to <2 x i64>
%163 = bitcast <4 x float> %120 to <2 x i64>
%164 = shufflevector <2 x i64> %162, <2 x i64> %163, <2 x i32> <i32 0, i32 2>
%165 = shufflevector <2 x i64> %162, <2 x i64> %163, <2 x i32> <i32 1, i32 3>
%166 = bitcast <2 x i64> %164 to <4 x i32>
%167 = bitcast <2 x i64> %165 to <4 x i32>
%168 = shufflevector <4 x i32> %160, <4 x i32> %160, <4 x i32> <i32 0, i32 1,
i32 2, i32 3>
%169 = shufflevector <4 x i32> %161, <4 x i32> %161, <4 x i32> <i32 0, i32 1,
i32 2, i32 3>
%170 = shufflevector <4 x i32> %166, <4 x i32> %166, <4 x i32> <i32 0, i32 1,
i32 2, i32 3>
%171 = shufflevector <4 x i32> %167, <4 x i32> %167, <4 x i32> <i32 0, i32 1,
i32 2, i32 3>
%172 = mul i32 0, %108
%173 = add i32 0, %172
%174 = getelementptr <16 x i8>, <16 x i8>* %color_ptr0, i32 0, i32 %173
%175 = bitcast i8* %174 to <4 x i32>*
%176 = load <4 x i32>, <4 x i32>* %175, align 16
%177 = mul i32 1, %108
%178 = add i32 0, %177
%179 = getelementptr <16 x i8>, <16 x i8>* %color_ptr0, i32 0, i32 %178
%180 = bitcast i8* %179 to <4 x i32>*
%181 = load <4 x i32>, <4 x i32>* %180, align 16
%182 = mul i32 2, %108
%183 = add i32 0, %182
%184 = getelementptr <16 x i8>, <16 x i8>* %color_ptr0, i32 0, i32 %183
%185 = bitcast i8* %184 to <4 x i32>*
%186 = load <4 x i32>, <4 x i32>* %185, align 16
%187 = mul i32 3, %108
%188 = add i32 0, %187
%189 = getelementptr <16 x i8>, <16 x i8>* %color_ptr0, i32 0, i32 %188
%190 = bitcast i8* %189 to <4 x i32>*
%191 = load <4 x i32>, <4 x i32>* %190, align 16
%192 = bitcast <4 x i32> %148 to <16 x i8>
%193 = bitcast <4 x i32> %131 to <16 x i8>
%194 = bitcast <4 x i32> %176 to <16 x i8>
%195 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %194, <16 x i8>
%193, <16 x i8> %192) #2
%196 = bitcast <16 x i8> %195 to <4 x i32>
%197 = bitcast <4 x i32> %149 to <16 x i8>
%198 = bitcast <4 x i32> %132 to <16 x i8>
%199 = bitcast <4 x i32> %181 to <16 x i8>
%200 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %199, <16 x i8>
%198, <16 x i8> %197) #2
%201 = bitcast <16 x i8> %200 to <4 x i32>
%202 = bitcast <4 x i32> %154 to <16 x i8>
%203 = bitcast <4 x i32> %137 to <16 x i8>
%204 = bitcast <4 x i32> %186 to <16 x i8>
%205 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %204, <16 x i8>
%203, <16 x i8> %202) #2
%206 = bitcast <16 x i8> %205 to <4 x i32>
%207 = bitcast <4 x i32> %155 to <16 x i8>
%208 = bitcast <4 x i32> %138 to <16 x i8>
%209 = bitcast <4 x i32> %191 to <16 x i8>
%210 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %209, <16 x i8>
%208, <16 x i8> %207) #2
%211 = bitcast <16 x i8> %210 to <4 x i32>
%212 = mul i32 0, %108
%213 = add i32 0, %212
%214 = getelementptr <16 x i8>, <16 x i8>* %color_ptr0, i32 0, i32 %213
%215 = bitcast i8* %214 to <4 x i32>*
store <4 x i32> %196, <4 x i32>* %215, align 16
%216 = mul i32 1, %108
%217 = add i32 0, %216
%218 = getelementptr <16 x i8>, <16 x i8>* %color_ptr0, i32 0, i32 %217
%219 = bitcast i8* %218 to <4 x i32>*
store <4 x i32> %201, <4 x i32>* %219, align 16
%220 = mul i32 2, %108
%221 = add i32 0, %220
%222 = getelementptr <16 x i8>, <16 x i8>* %color_ptr0, i32 0, i32 %221
%223 = bitcast i8* %222 to <4 x i32>*
store <4 x i32> %206, <4 x i32>* %223, align 16
%224 = mul i32 3, %108
%225 = add i32 0, %224
%226 = getelementptr <16 x i8>, <16 x i8>* %color_ptr0, i32 0, i32 %225
%227 = bitcast i8* %226 to <4 x i32>*
store <4 x i32> %211, <4 x i32>* %227, align 16
%228 = getelementptr <16 x i8>*, <16 x i8>** %color_ptr_ptr, i32 1
%color_ptr1 = load <16 x i8>*, <16 x i8>** %228
%229 = getelementptr i32, i32* %stride_ptr, i32 1
%230 = load i32, i32* %229
%231 = load <8 x float>, <8 x float>* %96
%232 = load <8 x float>, <8 x float>* %93
%233 = shufflevector <8 x i32> %mask, <8 x i32> %mask, <4 x i32> <i32 0, i32
1, i32 2, i32 3>
%234 = shufflevector <8 x i32> %mask, <8 x i32> %mask, <4 x i32> <i32 4, i32
5, i32 6, i32 7>
%235 = shufflevector <8 x float> %231, <8 x float> %231, <4 x i32> <i32 0,
i32 1, i32 2, i32 3>
%236 = shufflevector <8 x float> %231, <8 x float> %231, <4 x i32> <i32 4,
i32 5, i32 6, i32 7>
%237 = load <8 x float>, <8 x float>* %105
%238 = load <8 x float>, <8 x float>* %102
%239 = shufflevector <8 x i32> %mask17, <8 x i32> %mask17, <4 x i32> <i32 0,
i32 1, i32 2, i32 3>
%240 = shufflevector <8 x i32> %mask17, <8 x i32> %mask17, <4 x i32> <i32 4,
i32 5, i32 6, i32 7>
%241 = shufflevector <8 x float> %237, <8 x float> %237, <4 x i32> <i32 0,
i32 1, i32 2, i32 3>
%242 = shufflevector <8 x float> %237, <8 x float> %237, <4 x i32> <i32 4,
i32 5, i32 6, i32 7>
%243 = bitcast <8 x float> %232 to <8 x i32>
%244 = bitcast <8 x float> %238 to <8 x i32>
%245 = shufflevector <8 x i32> %244, <8 x i32> %244, <4 x i32> <i32 4, i32 5,
i32 6, i32 7>
%246 = shufflevector <8 x i32> %244, <8 x i32> %244, <4 x i32> <i32 0, i32 1,
i32 2, i32 3>
%247 = shufflevector <8 x i32> %243, <8 x i32> %243, <4 x i32> <i32 4, i32 5,
i32 6, i32 7>
%248 = shufflevector <8 x i32> %243, <8 x i32> %243, <4 x i32> <i32 0, i32 1,
i32 2, i32 3>
%249 = bitcast <4 x i32> %248 to <2 x i64>
%250 = bitcast <4 x i32> %247 to <2 x i64>
%251 = shufflevector <2 x i64> %249, <2 x i64> %250, <2 x i32> <i32 0, i32 2>
%252 = shufflevector <2 x i64> %249, <2 x i64> %250, <2 x i32> <i32 1, i32 3>
%253 = bitcast <2 x i64> %251 to <4 x i32>
%254 = bitcast <2 x i64> %252 to <4 x i32>
%255 = bitcast <4 x i32> %246 to <2 x i64>
%256 = bitcast <4 x i32> %245 to <2 x i64>
%257 = shufflevector <2 x i64> %255, <2 x i64> %256, <2 x i32> <i32 0, i32 2>
%258 = shufflevector <2 x i64> %255, <2 x i64> %256, <2 x i32> <i32 1, i32 3>
%259 = bitcast <2 x i64> %257 to <4 x i32>
%260 = bitcast <2 x i64> %258 to <4 x i32>
%context.f_blend_color_ptr18 = getelementptr { [16 x float*], [16 x i32],
[128 x { i32, i32, i32, i8*, [14 x i32], [14 x i32], i32, i32, [14 x i32] }],
[32 x { float, float, float, [4 x float] }], [32 x { i32, i32, i32, i8*, i32,
i32 }], float, i32, i32, i8*, float*, { float, float }*, [16 x i32*], [16 x
i32] }, { [16 x float*], [16 x i32], [128 x { i32, i32, i32, i8*, [14 x i32],
[14 x i32], i32, i32, [14 x i32] }], [32 x { float, float, float, [4 x float]
}], [32 x { i32, i32, i32, i8*, i32, i32 }], float, i32, i32, i8*, float*, {
float, float }*, [16 x i32*], [16 x i32] }* %context, i32 0, i32 9
%context.f_blend_color19 = load float*, float** %context.f_blend_color_ptr18
%261 = bitcast float* %context.f_blend_color19 to <4 x i32>*
%262 = getelementptr <4 x i32>, <4 x i32>* %261, i32 0
%263 = load <4 x i32>, <4 x i32>* %262
%264 = shufflevector <4 x i32> %263, <4 x i32> undef, <4 x i32> <i32 3, i32
3, i32 3, i32 3>
%265 = shufflevector <4 x i32> %263, <4 x i32> undef, <4 x i32>
zeroinitializer
%266 = bitcast <4 x i32> %233 to <2 x i64>
%267 = bitcast <4 x i32> %234 to <2 x i64>
%268 = shufflevector <2 x i64> %266, <2 x i64> %267, <2 x i32> <i32 0, i32 2>
%269 = shufflevector <2 x i64> %266, <2 x i64> %267, <2 x i32> <i32 1, i32 3>
%270 = bitcast <2 x i64> %268 to <4 x i32>
%271 = bitcast <2 x i64> %269 to <4 x i32>
%272 = bitcast <4 x i32> %239 to <2 x i64>
%273 = bitcast <4 x i32> %240 to <2 x i64>
%274 = shufflevector <2 x i64> %272, <2 x i64> %273, <2 x i32> <i32 0, i32 2>
%275 = shufflevector <2 x i64> %272, <2 x i64> %273, <2 x i32> <i32 1, i32 3>
%276 = bitcast <2 x i64> %274 to <4 x i32>
%277 = bitcast <2 x i64> %275 to <4 x i32>
%278 = bitcast <4 x float> %235 to <2 x i64>
%279 = bitcast <4 x float> %236 to <2 x i64>
%280 = shufflevector <2 x i64> %278, <2 x i64> %279, <2 x i32> <i32 0, i32 2>
%281 = shufflevector <2 x i64> %278, <2 x i64> %279, <2 x i32> <i32 1, i32 3>
%282 = bitcast <2 x i64> %280 to <4 x i32>
%283 = bitcast <2 x i64> %281 to <4 x i32>
%284 = bitcast <4 x float> %241 to <2 x i64>
%285 = bitcast <4 x float> %242 to <2 x i64>
%286 = shufflevector <2 x i64> %284, <2 x i64> %285, <2 x i32> <i32 0, i32 2>
%287 = shufflevector <2 x i64> %284, <2 x i64> %285, <2 x i32> <i32 1, i32 3>
%288 = bitcast <2 x i64> %286 to <4 x i32>
%289 = bitcast <2 x i64> %287 to <4 x i32>
%290 = shufflevector <4 x i32> %282, <4 x i32> %282, <4 x i32> <i32 0, i32 1,
i32 2, i32 3>
%291 = shufflevector <4 x i32> %283, <4 x i32> %283, <4 x i32> <i32 0, i32 1,
i32 2, i32 3>
%292 = shufflevector <4 x i32> %288, <4 x i32> %288, <4 x i32> <i32 0, i32 1,
i32 2, i32 3>
%293 = shufflevector <4 x i32> %289, <4 x i32> %289, <4 x i32> <i32 0, i32 1,
i32 2, i32 3>
%294 = mul i32 0, %230
%295 = add i32 0, %294
%296 = getelementptr <16 x i8>, <16 x i8>* %color_ptr1, i32 0, i32 %295
%297 = bitcast i8* %296 to <4 x i32>*
%298 = load <4 x i32>, <4 x i32>* %297, align 16
%299 = mul i32 1, %230
%300 = add i32 0, %299
%301 = getelementptr <16 x i8>, <16 x i8>* %color_ptr1, i32 0, i32 %300
%302 = bitcast i8* %301 to <4 x i32>*
%303 = load <4 x i32>, <4 x i32>* %302, align 16
%304 = mul i32 2, %230
%305 = add i32 0, %304
%306 = getelementptr <16 x i8>, <16 x i8>* %color_ptr1, i32 0, i32 %305
%307 = bitcast i8* %306 to <4 x i32>*
%308 = load <4 x i32>, <4 x i32>* %307, align 16
%309 = mul i32 3, %230
%310 = add i32 0, %309
%311 = getelementptr <16 x i8>, <16 x i8>* %color_ptr1, i32 0, i32 %310
%312 = bitcast i8* %311 to <4 x i32>*
%313 = load <4 x i32>, <4 x i32>* %312, align 16
%314 = bitcast <4 x i32> %270 to <16 x i8>
%315 = bitcast <4 x i32> %253 to <16 x i8>
%316 = bitcast <4 x i32> %298 to <16 x i8>
%317 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %316, <16 x i8>
%315, <16 x i8> %314) #2
%318 = bitcast <16 x i8> %317 to <4 x i32>
%319 = bitcast <4 x i32> %271 to <16 x i8>
%320 = bitcast <4 x i32> %254 to <16 x i8>
%321 = bitcast <4 x i32> %303 to <16 x i8>
%322 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %321, <16 x i8>
%320, <16 x i8> %319) #2
%323 = bitcast <16 x i8> %322 to <4 x i32>
%324 = bitcast <4 x i32> %276 to <16 x i8>
%325 = bitcast <4 x i32> %259 to <16 x i8>
%326 = bitcast <4 x i32> %308 to <16 x i8>
%327 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %326, <16 x i8>
%325, <16 x i8> %324) #2
%328 = bitcast <16 x i8> %327 to <4 x i32>
%329 = bitcast <4 x i32> %277 to <16 x i8>
%330 = bitcast <4 x i32> %260 to <16 x i8>
%331 = bitcast <4 x i32> %313 to <16 x i8>
%332 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %331, <16 x i8>
%330, <16 x i8> %329) #2
%333 = bitcast <16 x i8> %332 to <4 x i32>
%334 = mul i32 0, %230
%335 = add i32 0, %334
%336 = getelementptr <16 x i8>, <16 x i8>* %color_ptr1, i32 0, i32 %335
%337 = bitcast i8* %336 to <4 x i32>*
store <4 x i32> %318, <4 x i32>* %337, align 16
%338 = mul i32 1, %230
%339 = add i32 0, %338
%340 = getelementptr <16 x i8>, <16 x i8>* %color_ptr1, i32 0, i32 %339
%341 = bitcast i8* %340 to <4 x i32>*
store <4 x i32> %323, <4 x i32>* %341, align 16
%342 = mul i32 2, %230
%343 = add i32 0, %342
%344 = getelementptr <16 x i8>, <16 x i8>* %color_ptr1, i32 0, i32 %343
%345 = bitcast i8* %344 to <4 x i32>*
store <4 x i32> %328, <4 x i32>* %345, align 16
%346 = mul i32 3, %230
%347 = add i32 0, %346
%348 = getelementptr <16 x i8>, <16 x i8>* %color_ptr1, i32 0, i32 %347
%349 = bitcast i8* %348 to <4 x i32>*
store <4 x i32> %333, <4 x i32>* %349, align 16
ret void
}
llc -mattr option(s):
+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,-ptwrite,+xsavec,+popcnt,+aes,-avx512bitalg,-movdiri,+xsaves,-avx512er,-avx512vnni,-avx512vpopcntdq,-pconfig,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-movdir64b,-sse4a,-avx512bw,+clflushopt,+xsave,-avx512vbmi2,+64bit,-avx512vl,+invpcid,-avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,+sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,+adx,-avx512pf,+sse3
llc -mcpu option: skylake
define void @setup_variant_0(<4 x float>* noalias %in_v0, <4 x float>* noalias
%in_v1, <4 x float>* noalias %in_v2, i32 %in_facing, <4 x float>* noalias
%out_a0, <4 x float>* noalias %out_dadx, <4 x float>* noalias %out_dady) {
entry:
%0 = getelementptr <4 x float>, <4 x float>* %in_v0, i32 0
%v0a = load <4 x float>, <4 x float>* %0
%1 = getelementptr <4 x float>, <4 x float>* %in_v1, i32 0
%v1a = load <4 x float>, <4 x float>* %1
%2 = getelementptr <4 x float>, <4 x float>* %in_v2, i32 0
%v2a = load <4 x float>, <4 x float>* %2
%xy0_center = fsub <4 x float> %v0a, <float 5.000000e-01, float 5.000000e-01,
float 5.000000e-01, float 5.000000e-01>
%dxy01 = fsub <4 x float> %v0a, %v1a
%dxy20 = fsub <4 x float> %v2a, %v0a
%3 = shufflevector <4 x float> %dxy20, <4 x float> %dxy20, <4 x i32> <i32 1,
i32 0, i32 undef, i32 undef>
%ef = fmul <4 x float> %dxy01, %3
%4 = extractelement <4 x float> %ef, i32 0
%5 = extractelement <4 x float> %ef, i32 1
%6 = fsub float %4, %5
%ooa = fdiv float 1.000000e+00, %6
%7 = insertelement <4 x float> undef, float %ooa, i32 0
%8 = shufflevector <4 x float> %7, <4 x float> undef, <4 x i32>
zeroinitializer
%9 = fmul <4 x float> %dxy20, %8
%10 = fmul <4 x float> %dxy01, %8
%11 = shufflevector <4 x float> %9, <4 x float> undef, <4 x i32> <i32 1, i32
1, i32 1, i32 1>
%12 = shufflevector <4 x float> %10, <4 x float> undef, <4 x i32> <i32 1, i32
1, i32 1, i32 1>
%13 = shufflevector <4 x float> %9, <4 x float> undef, <4 x i32>
zeroinitializer
%14 = shufflevector <4 x float> %10, <4 x float> undef, <4 x i32>
zeroinitializer
%15 = shufflevector <4 x float> %xy0_center, <4 x float> undef, <4 x i32>
zeroinitializer
%16 = shufflevector <4 x float> %xy0_center, <4 x float> undef, <4 x i32>
<i32 1, i32 1, i32 1, i32 1>
%da01 = fsub <4 x float> %v0a, %v1a
%da20 = fsub <4 x float> %v2a, %v0a
%da01_dy20_ooa = fmul <4 x float> %da01, %11
%da20_dy01_ooa = fmul <4 x float> %da20, %12
%dadx = fsub <4 x float> %da01_dy20_ooa, %da20_dy01_ooa
%da01_dx20_ooa = fmul <4 x float> %da01, %13
%da20_dx01_ooa = fmul <4 x float> %da20, %14
%dady = fsub <4 x float> %da20_dx01_ooa, %da01_dx20_ooa
%dadx_x0 = fmul <4 x float> %dadx, %15
%dady_y0 = fmul <4 x float> %dady, %16
%attr_v0 = fadd <4 x float> %dadx_x0, %dady_y0
%attr_0 = fsub <4 x float> %v0a, %attr_v0
%17 = getelementptr <4 x float>, <4 x float>* %out_a0, i32 0
store <4 x float> %attr_0, <4 x float>* %17
%18 = getelementptr <4 x float>, <4 x float>* %out_dadx, i32 0
store <4 x float> %dadx, <4 x float>* %18
%19 = getelementptr <4 x float>, <4 x float>* %out_dady, i32 0
store <4 x float> %dady, <4 x float>* %19
%20 = getelementptr <4 x float>, <4 x float>* %in_v0, i32 1
%v0a1 = load <4 x float>, <4 x float>* %20
%21 = getelementptr <4 x float>, <4 x float>* %in_v1, i32 1
%v1a2 = load <4 x float>, <4 x float>* %21
%22 = getelementptr <4 x float>, <4 x float>* %in_v2, i32 1
%v2a3 = load <4 x float>, <4 x float>* %22
%23 = getelementptr <4 x float>, <4 x float>* %out_a0, i32 1
store <4 x float> %v2a3, <4 x float>* %23
%24 = getelementptr <4 x float>, <4 x float>* %out_dadx, i32 1
store <4 x float> zeroinitializer, <4 x float>* %24
%25 = getelementptr <4 x float>, <4 x float>* %out_dady, i32 1
store <4 x float> zeroinitializer, <4 x float>* %25
ret void
}
llc -mattr option(s):
+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,-ptwrite,+xsavec,+popcnt,+aes,-avx512bitalg,-movdiri,+xsaves,-avx512er,-avx512vnni,-avx512vpopcntdq,-pconfig,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-movdir64b,-sse4a,-avx512bw,+clflushopt,+xsave,-avx512vbmi2,+64bit,-avx512vl,+invpcid,-avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,+sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,+adx,-avx512pf,+sse3
llc -mcpu option: skylake
VERT
PROPERTY NEXT_SHADER FRAG
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1].x, PSIZE
DCL OUT[2].xy, GENERIC[9]
DCL TEMP[0..1], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000}
0: UMUL TEMP[0].x, IN[1].xxxx, IN[2].xxxx
1: UMUL_HI TEMP[1].x, IN[1].xxxx, IN[2].xxxx
2: MOV OUT[0], IN[0]
3: MOV OUT[2].y, TEMP[1].xxxx
4: MOV OUT[1].x, IMM[0].xxxx
5: MOV OUT[2].x, TEMP[0].xxxx
6: END
clamp_vertex_color = 0
clip_xy = 1
clip_z = 1
clip_user = 0
bypass_viewport = 0
clip_halfz = 0
need_edgeflags = 0
has_gs = 0
ucp_enable = 0
vertex_element[0].src_offset = 0
vertex_element[0].instance_divisor = 0
vertex_element[0].vertex_buffer_index = 0
vertex_element[0].src_format = PIPE_FORMAT_R32G32_FLOAT
vertex_element[1].src_offset = 0
vertex_element[1].instance_divisor = 0
vertex_element[1].vertex_buffer_index = 1
vertex_element[1].src_format = PIPE_FORMAT_R32_UINT
vertex_element[2].src_offset = 400
vertex_element[2].instance_divisor = 0
vertex_element[2].vertex_buffer_index = 1
vertex_element[2].src_format = PIPE_FORMAT_R32_UINT
; Function Attrs: nounwind readnone speculatable
declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #0
; Function Attrs: nounwind readonly
declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*, <8 x i32>, <8 x
i32>, i8) #1
llvm (version 0x800) found no intrinsic for llvm.x86.avx2.pmulu.dq, going to
crash...
--
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20190827/c75a3d16/attachment-0001.html>
More information about the mesa-dev
mailing list