[Mesa-dev] [Bug 72615] New: [llvmpipe] piglit copyteximage 1D regression
bugzilla-daemon at freedesktop.org
bugzilla-daemon at freedesktop.org
Wed Dec 11 14:10:28 PST 2013
https://bugs.freedesktop.org/show_bug.cgi?id=72615
Priority: medium
Bug ID: 72615
Keywords: regression
CC: jfonseca at vmware.com, sroland at vmware.com,
zackr at vmware.com
Assignee: mesa-dev at lists.freedesktop.org
Summary: [llvmpipe] piglit copyteximage 1D regression
Severity: normal
Classification: Unclassified
OS: Linux (All)
Reporter: vlee at freedesktop.org
Hardware: x86-64 (AMD64)
Status: NEW
Version: git
Component: Other
Product: Mesa
mesa: e84a1ab3c400f819408a7ebe01c2325cd59d94d3 (master)
$ ./bin/copyteximage 1D -auto
Testing GL_TEXTURE_1D
Texture target = GL_TEXTURE_1D, Internal format = GL_RED
Illegal sampler view creation without bind flag
Texture target = GL_TEXTURE_1D, Internal format = GL_RG
Illegal sampler view creation without bind flag
Texture target = GL_TEXTURE_1D, Internal format = GL_RGB8
Illegal sampler view creation without bind flag
Texture target = GL_TEXTURE_1D, Internal format = GL_RGB16
Illegal sampler view creation without bind flag
Texture target = GL_TEXTURE_1D, Internal format = GL_RGB16F
Illegal sampler view creation without bind flag
Intrinsic has incorrect argument type!
void (i32*)* @llvm.x86.sse.stmxcsr
Intrinsic has incorrect argument type!
void (i32*)* @llvm.x86.sse.stmxcsr
Intrinsic has incorrect argument type!
void (i32*)* @llvm.x86.sse.ldmxcsr
Intrinsic has incorrect argument type!
void (i32*)* @llvm.x86.sse.ldmxcsr
Broken module found, verification continues.
define void @fs5_variant4_partial({ [16 x float*], float, i32, i32, i8*,
float*, { float, float }*, [32 x { i32, i32, i32, i32, i32, i8*, [14 x i32],
[14 x i32], [14 x i32] }], [16 x { float, float, float, [4 x float] }] }*
noalias %context, i32 %x, i32 %y, i32, float* noalias %a0, float* noalias
%dadx, float* noalias %dady, <16 x i8>** noalias %color_ptr_ptr, i8* noalias
%depth, i32 %mask_input, { i64, i32 }* noalias %thread_data, i32* noalias
%stride_ptr, i32 %depth_stride) {
entry:
%mxcsr_ptr10 = alloca i32
%mxcsr_ptr = alloca i32
%packed_var = alloca <16 x i8>
%output8 = alloca <4 x float>
%output7 = alloca <4 x float>
%output6 = alloca <4 x float>
%output = alloca <4 x float>
%looplimiter = alloca i32
%execution_mask = alloca <4 x i32>
%color5 = alloca <4 x float>, i32 2
%color4 = alloca <4 x float>, i32 2
%color3 = alloca <4 x float>, i32 2
%color = alloca <4 x float>, i32 2
%loop_counter = alloca i32
%1 = alloca <4 x float>
%2 = alloca <4 x float>
%3 = alloca <4 x float>
%4 = alloca <4 x float>
%5 = alloca <4 x float>
%6 = alloca <4 x float>
%mask_store = alloca <4 x i32>, i32 2
%7 = sitofp i32 %x to float
%8 = sitofp i32 %y to float
%9 = getelementptr float* %dadx, i32 0
%10 = bitcast float* %9 to <4 x float>*
%pos.x.dadxaos = load <4 x float>* %10
%11 = getelementptr float* %dady, i32 0
%12 = bitcast float* %11 to <4 x float>*
%pos.x.dadyaos = load <4 x float>* %12
%13 = getelementptr float* %a0, i32 0
%14 = bitcast float* %13 to <4 x float>*
%pos.x.a0aos = load <4 x float>* %14
%15 = insertelement <4 x float> undef, float %7, i32 0
%16 = shufflevector <4 x float> %15, <4 x float> undef, <4 x i32>
zeroinitializer
%17 = fmul <4 x float> %16, %pos.x.dadxaos
%18 = insertelement <4 x float> undef, float %8, i32 0
%19 = shufflevector <4 x float> %18, <4 x float> undef, <4 x i32>
zeroinitializer
%20 = fmul <4 x float> %19, %pos.x.dadyaos
%21 = fadd <4 x float> %pos.x.a0aos, %20
%22 = fadd <4 x float> %21, %17
%23 = fadd float %7, 5.000000e-01
%24 = insertelement <4 x float> undef, float %23, i32 0
%25 = shufflevector <4 x float> %24, <4 x float> undef, <4 x i32>
zeroinitializer
%pos.x.a = fadd <4 x float> %25, <float 0.000000e+00, float 2.000000e+00,
float 0.000000e+00, float 2.000000e+00>
store <4 x float> zeroinitializer, <4 x float>* %6
store <4 x float> %pos.x.a, <4 x float>* %6
%26 = fadd float %8, 5.000000e-01
%27 = insertelement <4 x float> undef, float %26, i32 0
%28 = shufflevector <4 x float> %27, <4 x float> undef, <4 x i32>
zeroinitializer
%pos.y.a = fadd <4 x float> %28, <float 0.000000e+00, float 0.000000e+00,
float 2.000000e+00, float 2.000000e+00>
store <4 x float> zeroinitializer, <4 x float>* %5
store <4 x float> %pos.y.a, <4 x float>* %5
%29 = shufflevector <4 x float> %pos.x.dadxaos, <4 x float> undef, <4 x i32>
<i32 2, i32 2, i32 2, i32 2>
%30 = shufflevector <4 x float> %pos.x.dadyaos, <4 x float> undef, <4 x i32>
<i32 2, i32 2, i32 2, i32 2>
%31 = shufflevector <4 x float> %22, <4 x float> undef, <4 x i32> <i32 2, i32
2, i32 2, i32 2>
%32 = fmul <4 x float> %29, <float 0.000000e+00, float 1.000000e+00, float
0.000000e+00, float 1.000000e+00>
%33 = fmul <4 x float> %30, <float 0.000000e+00, float 0.000000e+00, float
1.000000e+00, float 1.000000e+00>
%pos.z.dadq = fadd <4 x float> %32, %33
%34 = fadd <4 x float> %pos.z.dadq, %pos.z.dadq
%pos.z.a = fadd <4 x float> %31, %34
store <4 x float> zeroinitializer, <4 x float>* %4
store <4 x float> %pos.z.a, <4 x float>* %4
%35 = shufflevector <4 x float> %pos.x.dadxaos, <4 x float> undef, <4 x i32>
<i32 3, i32 3, i32 3, i32 3>
%36 = shufflevector <4 x float> %pos.x.dadyaos, <4 x float> undef, <4 x i32>
<i32 3, i32 3, i32 3, i32 3>
%37 = shufflevector <4 x float> %22, <4 x float> undef, <4 x i32> <i32 3, i32
3, i32 3, i32 3>
%38 = fmul <4 x float> %35, <float 0.000000e+00, float 1.000000e+00, float
0.000000e+00, float 1.000000e+00>
%39 = fmul <4 x float> %36, <float 0.000000e+00, float 0.000000e+00, float
1.000000e+00, float 1.000000e+00>
%pos.w.dadq = fadd <4 x float> %38, %39
%40 = fadd <4 x float> %pos.w.dadq, %pos.w.dadq
%pos.w.a = fadd <4 x float> %37, %40
store <4 x float> zeroinitializer, <4 x float>* %3
store <4 x float> %pos.w.a, <4 x float>* %3
%41 = getelementptr float* %dadx, i32 4
%42 = bitcast float* %41 to <4 x float>*
%input0.x.dadxaos = load <4 x float>* %42
%43 = getelementptr float* %dady, i32 4
%44 = bitcast float* %43 to <4 x float>*
%input0.x.dadyaos = load <4 x float>* %44
%45 = getelementptr float* %a0, i32 4
%46 = bitcast float* %45 to <4 x float>*
%input0.x.a0aos = load <4 x float>* %46
%47 = insertelement <4 x float> undef, float %7, i32 0
%48 = shufflevector <4 x float> %47, <4 x float> undef, <4 x i32>
zeroinitializer
%49 = fmul <4 x float> %48, %input0.x.dadxaos
%50 = insertelement <4 x float> undef, float %8, i32 0
%51 = shufflevector <4 x float> %50, <4 x float> undef, <4 x i32>
zeroinitializer
%52 = fmul <4 x float> %51, %input0.x.dadyaos
%53 = fadd <4 x float> %input0.x.a0aos, %52
%54 = fadd <4 x float> %53, %49
%55 = shufflevector <4 x float> %input0.x.dadxaos, <4 x float> undef, <4 x
i32> zeroinitializer
%56 = shufflevector <4 x float> %input0.x.dadyaos, <4 x float> undef, <4 x
i32> zeroinitializer
%57 = shufflevector <4 x float> %54, <4 x float> undef, <4 x i32>
zeroinitializer
%58 = fmul <4 x float> %55, <float 0.000000e+00, float 1.000000e+00, float
0.000000e+00, float 1.000000e+00>
%59 = fmul <4 x float> %56, <float 0.000000e+00, float 0.000000e+00, float
1.000000e+00, float 1.000000e+00>
%input0.x.dadq = fadd <4 x float> %58, %59
%60 = fadd <4 x float> %input0.x.dadq, %input0.x.dadq
%input0.x.a = fadd <4 x float> %57, %60
store <4 x float> zeroinitializer, <4 x float>* %2
store <4 x float> %input0.x.a, <4 x float>* %2
%61 = shufflevector <4 x float> %input0.x.dadxaos, <4 x float> undef, <4 x
i32> <i32 1, i32 1, i32 1, i32 1>
%62 = shufflevector <4 x float> %input0.x.dadyaos, <4 x float> undef, <4 x
i32> <i32 1, i32 1, i32 1, i32 1>
%63 = shufflevector <4 x float> %54, <4 x float> undef, <4 x i32> <i32 1, i32
1, i32 1, i32 1>
%64 = fmul <4 x float> %61, <float 0.000000e+00, float 1.000000e+00, float
0.000000e+00, float 1.000000e+00>
%65 = fmul <4 x float> %62, <float 0.000000e+00, float 0.000000e+00, float
1.000000e+00, float 1.000000e+00>
%input0.y.dadq = fadd <4 x float> %64, %65
%66 = fadd <4 x float> %input0.y.dadq, %input0.y.dadq
%input0.y.a = fadd <4 x float> %63, %66
store <4 x float> zeroinitializer, <4 x float>* %1
store <4 x float> %input0.y.a, <4 x float>* %1
%mask_ptr = getelementptr <4 x i32>* %mask_store, i32 0
%67 = lshr i32 %mask_input, 0
%68 = insertelement <4 x i32> undef, i32 %67, i32 0
%69 = shufflevector <4 x i32> %68, <4 x i32> undef, <4 x i32> zeroinitializer
%70 = and <4 x i32> %69, <i32 1, i32 2, i32 16, i32 32>
%71 = icmp ne <4 x i32> %70, zeroinitializer
%72 = sext <4 x i1> %71 to <4 x i32>
store <4 x i32> %72, <4 x i32>* %mask_ptr
%mask_ptr1 = getelementptr <4 x i32>* %mask_store, i32 1
%73 = lshr i32 %mask_input, 2
%74 = insertelement <4 x i32> undef, i32 %73, i32 0
%75 = shufflevector <4 x i32> %74, <4 x i32> undef, <4 x i32> zeroinitializer
%76 = and <4 x i32> %75, <i32 1, i32 2, i32 16, i32 32>
%77 = icmp ne <4 x i32> %76, zeroinitializer
%78 = sext <4 x i1> %77 to <4 x i32>
store <4 x i32> %78, <4 x i32>* %mask_ptr1
%context.stencil_ref_front_ptr = getelementptr { [16 x float*], float, i32,
i32, i8*, float*, { float, float }*, [32 x { i32, i32, i32, i32, i32, i8*, [14
x i32], [14 x i32], [14 x i32] }], [16 x { float, float, float, [4 x float] }]
}* %context, i32 0, i32 2
%context.stencil_ref_front = load i32* %context.stencil_ref_front_ptr
%context.stencil_ref_back_ptr = getelementptr { [16 x float*], float, i32,
i32, i8*, float*, { float, float }*, [32 x { i32, i32, i32, i32, i32, i8*, [14
x i32], [14 x i32], [14 x i32] }], [16 x { float, float, float, [4 x float] }]
}* %context, i32 0, i32 3
%context.stencil_ref_back = load i32* %context.stencil_ref_back_ptr
%context.constants_ptr = getelementptr { [16 x float*], float, i32, i32, i8*,
float*, { float, float }*, [32 x { i32, i32, i32, i32, i32, i8*, [14 x i32],
[14 x i32], [14 x i32] }], [16 x { float, float, float, [4 x float] }] }*
%context, i32 0, i32 0
store i32 0, i32* %loop_counter
store i32 0, i32* %loop_counter
br label %loop_begin
loop_begin: ; preds = %skip, %entry
%79 = load i32* %loop_counter
%80 = icmp ult i32 %79, 2
br i1 %80, label %loop_body, label %loop_exit
loop_body: ; preds = %loop_begin
%mask_ptr2 = getelementptr <4 x i32>* %mask_store, i32 %79
%81 = load <4 x i32>* %mask_ptr2
store <4 x i32> zeroinitializer, <4 x i32>* %execution_mask
store <4 x i32> %81, <4 x i32>* %execution_mask
%82 = load <4 x i32>* %execution_mask
%83 = bitcast <4 x i32> %82 to i128
%84 = icmp eq i128 %83, 0
br i1 %84, label %skip, label %85
; <label>:85 ; preds = %loop_body
%86 = bitcast <4 x float>* %6 to float*
%87 = getelementptr float* %86, i32 %79
%88 = load float* %87
%89 = insertelement <4 x float> undef, float %88, i32 0
%90 = shufflevector <4 x float> %89, <4 x float> undef, <4 x i32>
zeroinitializer
%pos.x = fadd <4 x float> %90, <float 0.000000e+00, float 1.000000e+00, float
0.000000e+00, float 1.000000e+00>
%91 = bitcast <4 x float>* %5 to float*
%92 = getelementptr float* %91, i32 %79
%93 = load float* %92
%94 = insertelement <4 x float> undef, float %93, i32 0
%95 = shufflevector <4 x float> %94, <4 x float> undef, <4 x i32>
zeroinitializer
%pos.y = fadd <4 x float> %95, <float 0.000000e+00, float 0.000000e+00, float
1.000000e+00, float 1.000000e+00>
%96 = bitcast <4 x float>* %4 to float*
%97 = getelementptr float* %96, i32 %79
%98 = load float* %97
%99 = insertelement <4 x float> undef, float %98, i32 0
%100 = shufflevector <4 x float> %99, <4 x float> undef, <4 x i32>
zeroinitializer
%101 = fadd <4 x float> %100, %pos.z.dadq
%pos.z = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %101, <4 x float>
<float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float
1.000000e+00>)
%102 = bitcast <4 x float>* %3 to float*
%103 = getelementptr float* %102, i32 %79
%104 = load float* %103
%105 = insertelement <4 x float> undef, float %104, i32 0
%106 = shufflevector <4 x float> %105, <4 x float> undef, <4 x i32>
zeroinitializer
%pos.w = fadd <4 x float> %106, %pos.w.dadq
%107 = bitcast <4 x float>* %2 to float*
%108 = getelementptr float* %107, i32 %79
%109 = load float* %108
%110 = insertelement <4 x float> undef, float %109, i32 0
%111 = shufflevector <4 x float> %110, <4 x float> undef, <4 x i32>
zeroinitializer
%input0.x = fadd <4 x float> %111, %input0.x.dadq
%112 = bitcast <4 x float>* %1 to float*
%113 = getelementptr float* %112, i32 %79
%114 = load float* %113
%115 = insertelement <4 x float> undef, float %114, i32 0
%116 = shufflevector <4 x float> %115, <4 x float> undef, <4 x i32>
zeroinitializer
%input0.y = fadd <4 x float> %116, %input0.y.dadq
store i32 0, i32* %looplimiter
store i32 65535, i32* %looplimiter
store <4 x float> zeroinitializer, <4 x float>* %output
store <4 x float> zeroinitializer, <4 x float>* %output6
store <4 x float> zeroinitializer, <4 x float>* %output7
store <4 x float> zeroinitializer, <4 x float>* %output8
%117 = getelementptr { [16 x float*], float, i32, i32, i8*, float*, { float,
float }*, [32 x { i32, i32, i32, i32, i32, i8*, [14 x i32], [14 x i32], [14 x
i32] }], [16 x { float, float, float, [4 x float] }] }* %context, i32 0, i32 7,
i32 0, i32 0
%context.texture0.width = load i32* %117
%context.texture0.row_stride = getelementptr { [16 x float*], float, i32,
i32, i8*, float*, { float, float }*, [32 x { i32, i32, i32, i32, i32, i8*, [14
x i32], [14 x i32], [14 x i32] }], [16 x { float, float, float, [4 x float] }]
}* %context, i32 0, i32 7, i32 0, i32 6
%context.texture0.img_stride = getelementptr { [16 x float*], float, i32,
i32, i8*, float*, { float, float }*, [32 x { i32, i32, i32, i32, i32, i8*, [14
x i32], [14 x i32], [14 x i32] }], [16 x { float, float, float, [4 x float] }]
}* %context, i32 0, i32 7, i32 0, i32 7
%118 = getelementptr { [16 x float*], float, i32, i32, i8*, float*, { float,
float }*, [32 x { i32, i32, i32, i32, i32, i8*, [14 x i32], [14 x i32], [14 x
i32] }], [16 x { float, float, float, [4 x float] }] }* %context, i32 0, i32 7,
i32 0, i32 5
%context.texture0.base_ptr = load i8** %118
%context.texture0.mip_offsets = getelementptr { [16 x float*], float, i32,
i32, i8*, float*, { float, float }*, [32 x { i32, i32, i32, i32, i32, i8*, [14
x i32], [14 x i32], [14 x i32] }], [16 x { float, float, float, [4 x float] }]
}* %context, i32 0, i32 7, i32 0, i32 8
%119 = insertelement <4 x i32> undef, i32 %context.texture0.width, i32 0
%120 = getelementptr { [16 x float*], float, i32, i32, i8*, float*, { float,
float }*, [32 x { i32, i32, i32, i32, i32, i8*, [14 x i32], [14 x i32], [14 x
i32] }], [16 x { float, float, float, [4 x float] }] }* %context, i32 0, i32 7,
i32 0, i32 1
%context.texture0.height = load i32* %120
%121 = insertelement <4 x i32> %119, i32 %context.texture0.height, i32 1
%122 = getelementptr { [16 x float*], float, i32, i32, i8*, float*, { float,
float }*, [32 x { i32, i32, i32, i32, i32, i8*, [14 x i32], [14 x i32], [14 x
i32] }], [16 x { float, float, float, [4 x float] }] }* %context, i32 0, i32 7,
i32 0, i32 3
%context.texture0.first_level = load i32* %122
store <16 x i8> zeroinitializer, <16 x i8>* %packed_var
%123 = insertelement <4 x i32> undef, i32 %context.texture0.first_level, i32
0
%124 = shufflevector <4 x i32> %123, <4 x i32> undef, <4 x i32>
zeroinitializer
%minify = lshr <4 x i32> %121, %124
%125 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %minify, <4 x i32>
<i32 1, i32 1, i32 1, i32 1>)
%126 = getelementptr [14 x i32]* %context.texture0.row_stride, i32 0, i32
%context.texture0.first_level
%127 = load i32* %126
%128 = insertelement <4 x i32> undef, i32 %127, i32 0
%129 = shufflevector <4 x i32> %128, <4 x i32> undef, <4 x i32>
zeroinitializer
%130 = getelementptr [14 x i32]* %context.texture0.mip_offsets, i32 0, i32
%context.texture0.first_level
%131 = load i32* %130
%132 = getelementptr i8* %context.texture0.base_ptr, i32 %131
%133 = shufflevector <4 x i32> %125, <4 x i32> undef, <4 x i32>
zeroinitializer
%134 = shufflevector <4 x i32> %125, <4 x i32> undef, <4 x i32> <i32 1, i32
1, i32 1, i32 1>
%135 = shl <4 x i32> %125, <i32 8, i32 8, i32 8, i32 8>
%136 = sitofp <4 x i32> %135 to <4 x float>
%137 = shufflevector <4 x float> %136, <4 x float> undef, <4 x i32>
zeroinitializer
%138 = shufflevector <4 x float> %136, <4 x float> undef, <4 x i32> <i32 1,
i32 1, i32 1, i32 1>
%139 = fmul <4 x float> %input0.x, %137
%140 = fmul <4 x float> %input0.y, %138
%141 = fptosi <4 x float> %139 to <4 x i32>
%142 = fptosi <4 x float> %140 to <4 x i32>
%143 = ashr <4 x i32> %141, <i32 8, i32 8, i32 8, i32 8>
%144 = ashr <4 x i32> %142, <i32 8, i32 8, i32 8, i32 8>
%145 = sub <4 x i32> %133, <i32 1, i32 1, i32 1, i32 1>
%146 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %143, <4 x i32>
zeroinitializer)
%147 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %146, <4 x i32> %145)
%148 = mul <4 x i32> %147, <i32 4, i32 4, i32 4, i32 4>
%149 = sub <4 x i32> %134, <i32 1, i32 1, i32 1, i32 1>
%150 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %144, <4 x i32>
zeroinitializer)
%151 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %150, <4 x i32> %149)
%152 = mul <4 x i32> %151, %129
%153 = add <4 x i32> %148, %152
%154 = extractelement <4 x i32> %153, i32 0
%155 = getelementptr i8* %132, i32 %154
%156 = bitcast i8* %155 to i32*
%157 = load i32* %156
%158 = insertelement <4 x i32> undef, i32 %157, i32 0
%159 = extractelement <4 x i32> %153, i32 1
%160 = getelementptr i8* %132, i32 %159
%161 = bitcast i8* %160 to i32*
%162 = load i32* %161
%163 = insertelement <4 x i32> %158, i32 %162, i32 1
%164 = extractelement <4 x i32> %153, i32 2
%165 = getelementptr i8* %132, i32 %164
%166 = bitcast i8* %165 to i32*
%167 = load i32* %166
%168 = insertelement <4 x i32> %163, i32 %167, i32 2
%169 = extractelement <4 x i32> %153, i32 3
%170 = getelementptr i8* %132, i32 %169
%171 = bitcast i8* %170 to i32*
%172 = load i32* %171
%173 = insertelement <4 x i32> %168, i32 %172, i32 3
%174 = bitcast <4 x i32> %173 to <16 x i8>
store <16 x i8> %174, <16 x i8>* %packed_var
%175 = load <16 x i8>* %packed_var
%176 = bitcast <16 x i8> %175 to <4 x i32>
%177 = and <4 x i32> %176, <i32 255, i32 255, i32 255, i32 255>
%178 = sitofp <4 x i32> %177 to <4 x float>
%179 = fmul <4 x float> %178, <float 0x3F70101020000000, float
0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000>
%180 = lshr <4 x i32> %176, <i32 8, i32 8, i32 8, i32 8>
%181 = and <4 x i32> %180, <i32 255, i32 255, i32 255, i32 255>
%182 = sitofp <4 x i32> %181 to <4 x float>
%183 = fmul <4 x float> %182, <float 0x3F70101020000000, float
0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000>
%184 = lshr <4 x i32> %176, <i32 16, i32 16, i32 16, i32 16>
%185 = and <4 x i32> %184, <i32 255, i32 255, i32 255, i32 255>
%186 = sitofp <4 x i32> %185 to <4 x float>
%187 = fmul <4 x float> %186, <float 0x3F70101020000000, float
0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000>
%188 = lshr <4 x i32> %176, <i32 24, i32 24, i32 24, i32 24>
%189 = sitofp <4 x i32> %188 to <4 x float>
%190 = fmul <4 x float> %189, <float 0x3F70101020000000, float
0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000>
store <4 x float> %187, <4 x float>* %output
store <4 x float> %183, <4 x float>* %output6
store <4 x float> %179, <4 x float>* %output7
store <4 x float> %190, <4 x float>* %output8
%color0.r = load <4 x float>* %output
%191 = getelementptr <4 x float>* %color, i32 %79
store <4 x float> %color0.r, <4 x float>* %191
%color0.g = load <4 x float>* %output6
%192 = getelementptr <4 x float>* %color3, i32 %79
store <4 x float> %color0.g, <4 x float>* %192
%color0.b = load <4 x float>* %output7
%193 = getelementptr <4 x float>* %color4, i32 %79
store <4 x float> %color0.b, <4 x float>* %193
%color0.a = load <4 x float>* %output8
%194 = getelementptr <4 x float>* %color5, i32 %79
store <4 x float> %color0.a, <4 x float>* %194
br label %skip
skip: ; preds = %85, %loop_body
%195 = load <4 x i32>* %execution_mask
store <4 x i32> %195, <4 x i32>* %mask_ptr2
%196 = add i32 %79, 1
store i32 %196, i32* %loop_counter
br label %loop_begin
loop_exit: ; preds = %loop_begin
%197 = getelementptr <4 x i32>* %mask_store, i32 0
%mask = load <4 x i32>* %197
%198 = getelementptr <4 x float>* %color, i32 0
%199 = getelementptr <4 x float>* %color3, i32 0
%200 = getelementptr <4 x float>* %color4, i32 0
%201 = getelementptr <4 x float>* %color5, i32 0
%202 = getelementptr <4 x i32>* %mask_store, i32 1
%mask9 = load <4 x i32>* %202
%203 = getelementptr <4 x float>* %color, i32 1
%204 = getelementptr <4 x float>* %color3, i32 1
%205 = getelementptr <4 x float>* %color4, i32 1
%206 = getelementptr <4 x float>* %color5, i32 1
%207 = getelementptr <16 x i8>** %color_ptr_ptr, i32 0
%color_ptr0 = load <16 x i8>** %207
%208 = getelementptr i32* %stride_ptr, i32 0
%209 = load i32* %208
store i32 0, i32* %mxcsr_ptr
call void @llvm.x86.sse.stmxcsr(i32* %mxcsr_ptr)
store i32 0, i32* %mxcsr_ptr10
call void @llvm.x86.sse.stmxcsr(i32* %mxcsr_ptr10)
%mxcsr = load i32* %mxcsr_ptr10
%210 = and i32 %mxcsr, -32833
store i32 %210, i32* %mxcsr_ptr10
call void @llvm.x86.sse.ldmxcsr(i32* %mxcsr_ptr10)
%211 = load <4 x float>* %201
%212 = load <4 x float>* %198
%213 = load <4 x float>* %199
%214 = load <4 x float>* %200
%215 = load <4 x float>* %206
%216 = load <4 x float>* %203
%217 = load <4 x float>* %204
%218 = load <4 x float>* %205
%219 = shufflevector <4 x float> %212, <4 x float> %213, <4 x i32> <i32 0,
i32 4, i32 1, i32 5>
%220 = shufflevector <4 x float> %214, <4 x float> %211, <4 x i32> <i32 0,
i32 4, i32 1, i32 5>
%221 = shufflevector <4 x float> %212, <4 x float> %213, <4 x i32> <i32 2,
i32 6, i32 3, i32 7>
%222 = shufflevector <4 x float> %214, <4 x float> %211, <4 x i32> <i32 2,
i32 6, i32 3, i32 7>
%t0 = bitcast <4 x float> %219 to <2 x double>
%t1 = bitcast <4 x float> %220 to <2 x double>
%t2 = bitcast <4 x float> %221 to <2 x double>
%t3 = bitcast <4 x float> %222 to <2 x double>
%223 = shufflevector <2 x double> %t0, <2 x double> %t1, <2 x i32> <i32 0,
i32 2>
%224 = shufflevector <2 x double> %t0, <2 x double> %t1, <2 x i32> <i32 1,
i32 3>
%225 = shufflevector <2 x double> %t2, <2 x double> %t3, <2 x i32> <i32 0,
i32 2>
%226 = shufflevector <2 x double> %t2, <2 x double> %t3, <2 x i32> <i32 1,
i32 3>
%dst0 = bitcast <2 x double> %223 to <4 x float>
%dst1 = bitcast <2 x double> %224 to <4 x float>
%dst2 = bitcast <2 x double> %225 to <4 x float>
%dst3 = bitcast <2 x double> %226 to <4 x float>
%227 = shufflevector <4 x float> %216, <4 x float> %217, <4 x i32> <i32 0,
i32 4, i32 1, i32 5>
%228 = shufflevector <4 x float> %218, <4 x float> %215, <4 x i32> <i32 0,
i32 4, i32 1, i32 5>
%229 = shufflevector <4 x float> %216, <4 x float> %217, <4 x i32> <i32 2,
i32 6, i32 3, i32 7>
%230 = shufflevector <4 x float> %218, <4 x float> %215, <4 x i32> <i32 2,
i32 6, i32 3, i32 7>
%t011 = bitcast <4 x float> %227 to <2 x double>
%t112 = bitcast <4 x float> %228 to <2 x double>
%t213 = bitcast <4 x float> %229 to <2 x double>
%t314 = bitcast <4 x float> %230 to <2 x double>
%231 = shufflevector <2 x double> %t011, <2 x double> %t112, <2 x i32> <i32
0, i32 2>
%232 = shufflevector <2 x double> %t011, <2 x double> %t112, <2 x i32> <i32
1, i32 3>
%233 = shufflevector <2 x double> %t213, <2 x double> %t314, <2 x i32> <i32
0, i32 2>
%234 = shufflevector <2 x double> %t213, <2 x double> %t314, <2 x i32> <i32
1, i32 3>
%dst015 = bitcast <2 x double> %231 to <4 x float>
%dst116 = bitcast <2 x double> %232 to <4 x float>
%dst217 = bitcast <2 x double> %233 to <4 x float>
%dst318 = bitcast <2 x double> %234 to <4 x float>
%context.f_blend_color_ptr = getelementptr { [16 x float*], float, i32, i32,
i8*, float*, { float, float }*, [32 x { i32, i32, i32, i32, i32, i8*, [14 x
i32], [14 x i32], [14 x i32] }], [16 x { float, float, float, [4 x float] }] }*
%context, i32 0, i32 5
%context.f_blend_color = load float** %context.f_blend_color_ptr
%235 = bitcast float* %context.f_blend_color to <4 x float>*
%236 = getelementptr <4 x float>* %235, i32 0
%237 = load <4 x float>* %236
%238 = shufflevector <4 x float> %237, <4 x float> undef, <4 x i32> <i32 3,
i32 3, i32 3, i32 3>
%239 = shufflevector <4 x float> %237, <4 x float> undef, <4 x i32> <i32 0,
i32 1, i32 2, i32 0>
%240 = bitcast <4 x i32> %mask to <2 x i64>
%241 = bitcast <4 x i32> %mask9 to <2 x i64>
%242 = shufflevector <2 x i64> %240, <2 x i64> %241, <2 x i32> <i32 0, i32 2>
%243 = shufflevector <2 x i64> %240, <2 x i64> %241, <2 x i32> <i32 1, i32 3>
%244 = bitcast <2 x i64> %242 to <4 x i32>
%245 = bitcast <2 x i64> %243 to <4 x i32>
%246 = extractelement <4 x i32> %245, i32 3
%247 = extractelement <4 x i32> %245, i32 2
%248 = extractelement <4 x i32> %245, i32 1
%249 = extractelement <4 x i32> %245, i32 0
%250 = extractelement <4 x i32> %244, i32 3
%251 = extractelement <4 x i32> %244, i32 2
%252 = extractelement <4 x i32> %244, i32 1
%253 = extractelement <4 x i32> %244, i32 0
%254 = sext i32 %253 to i96
%255 = bitcast i96 %254 to <3 x i32>
%256 = shufflevector <3 x i32> %255, <3 x i32> undef, <4 x i32> <i32 0, i32
1, i32 2, i32 3>
%257 = sext i32 %252 to i96
%258 = bitcast i96 %257 to <3 x i32>
%259 = shufflevector <3 x i32> %258, <3 x i32> undef, <4 x i32> <i32 0, i32
1, i32 2, i32 3>
%260 = sext i32 %251 to i96
%261 = bitcast i96 %260 to <3 x i32>
%262 = shufflevector <3 x i32> %261, <3 x i32> undef, <4 x i32> <i32 0, i32
1, i32 2, i32 3>
%263 = sext i32 %250 to i96
%264 = bitcast i96 %263 to <3 x i32>
%265 = shufflevector <3 x i32> %264, <3 x i32> undef, <4 x i32> <i32 0, i32
1, i32 2, i32 3>
%266 = sext i32 %249 to i96
%267 = bitcast i96 %266 to <3 x i32>
%268 = shufflevector <3 x i32> %267, <3 x i32> undef, <4 x i32> <i32 0, i32
1, i32 2, i32 3>
%269 = sext i32 %248 to i96
%270 = bitcast i96 %269 to <3 x i32>
%271 = shufflevector <3 x i32> %270, <3 x i32> undef, <4 x i32> <i32 0, i32
1, i32 2, i32 3>
%272 = sext i32 %247 to i96
%273 = bitcast i96 %272 to <3 x i32>
%274 = shufflevector <3 x i32> %273, <3 x i32> undef, <4 x i32> <i32 0, i32
1, i32 2, i32 3>
%275 = sext i32 %246 to i96
%276 = bitcast i96 %275 to <3 x i32>
%277 = shufflevector <3 x i32> %276, <3 x i32> undef, <4 x i32> <i32 0, i32
1, i32 2, i32 3>
%278 = bitcast <4 x float> %211 to <2 x i64>
%279 = bitcast <4 x float> %215 to <2 x i64>
%280 = shufflevector <2 x i64> %278, <2 x i64> %279, <2 x i32> <i32 0, i32 2>
%281 = shufflevector <2 x i64> %278, <2 x i64> %279, <2 x i32> <i32 1, i32 3>
%282 = bitcast <2 x i64> %280 to <4 x float>
%283 = bitcast <2 x i64> %281 to <4 x float>
%284 = extractelement <4 x float> %283, i32 3
%285 = extractelement <4 x float> %283, i32 2
%286 = extractelement <4 x float> %283, i32 1
%287 = extractelement <4 x float> %283, i32 0
%288 = extractelement <4 x float> %282, i32 3
%289 = extractelement <4 x float> %282, i32 2
%290 = extractelement <4 x float> %282, i32 1
%291 = extractelement <4 x float> %282, i32 0
%292 = insertelement <4 x float> undef, float %291, i32 0
%293 = shufflevector <4 x float> %292, <4 x float> undef, <4 x i32>
zeroinitializer
%294 = insertelement <4 x float> undef, float %290, i32 0
%295 = shufflevector <4 x float> %294, <4 x float> undef, <4 x i32>
zeroinitializer
%296 = insertelement <4 x float> undef, float %289, i32 0
%297 = shufflevector <4 x float> %296, <4 x float> undef, <4 x i32>
zeroinitializer
%298 = insertelement <4 x float> undef, float %288, i32 0
%299 = shufflevector <4 x float> %298, <4 x float> undef, <4 x i32>
zeroinitializer
%300 = insertelement <4 x float> undef, float %287, i32 0
%301 = shufflevector <4 x float> %300, <4 x float> undef, <4 x i32>
zeroinitializer
%302 = insertelement <4 x float> undef, float %286, i32 0
%303 = shufflevector <4 x float> %302, <4 x float> undef, <4 x i32>
zeroinitializer
%304 = insertelement <4 x float> undef, float %285, i32 0
%305 = shufflevector <4 x float> %304, <4 x float> undef, <4 x i32>
zeroinitializer
%306 = insertelement <4 x float> undef, float %284, i32 0
%307 = shufflevector <4 x float> %306, <4 x float> undef, <4 x i32>
zeroinitializer
%308 = mul i32 0, %209
%309 = add i32 0, %308
%310 = getelementptr <16 x i8>* %color_ptr0, i32 0, i32 %309
%311 = bitcast i8* %310 to <3 x i16>*
%312 = load <3 x i16>* %311, align 2
%313 = mul i32 0, %209
%314 = add i32 6, %313
%315 = getelementptr <16 x i8>* %color_ptr0, i32 0, i32 %314
%316 = bitcast i8* %315 to <3 x i16>*
%317 = load <3 x i16>* %316, align 2
%318 = mul i32 0, %209
%319 = add i32 12, %318
%320 = getelementptr <16 x i8>* %color_ptr0, i32 0, i32 %319
%321 = bitcast i8* %320 to <3 x i16>*
%322 = load <3 x i16>* %321, align 2
%323 = mul i32 0, %209
%324 = add i32 18, %323
%325 = getelementptr <16 x i8>* %color_ptr0, i32 0, i32 %324
%326 = bitcast i8* %325 to <3 x i16>*
%327 = load <3 x i16>* %326, align 2
%328 = shufflevector <3 x i16> %312, <3 x i16> undef, <4 x i32> <i32 0, i32
1, i32 2, i32 3>
%329 = shufflevector <3 x i16> %317, <3 x i16> undef, <4 x i32> <i32 0, i32
1, i32 2, i32 3>
%330 = shufflevector <3 x i16> %322, <3 x i16> undef, <4 x i32> <i32 0, i32
1, i32 2, i32 3>
%331 = shufflevector <3 x i16> %327, <3 x i16> undef, <4 x i32> <i32 0, i32
1, i32 2, i32 3>
%332 = zext <4 x i16> %328 to <4 x i32>
%333 = shl <4 x i32> %332, <i32 13, i32 13, i32 13, i32 13>
%334 = and <4 x i32> %333, <i32 268427264, i32 268427264, i32 268427264, i32
268427264>
%335 = bitcast <4 x i32> %334 to <4 x float>
%336 = fmul <4 x float> %335, <float 0x46F0000000000000, float
0x46F0000000000000, float 0x46F0000000000000, float 0x46F0000000000000>
%337 = fcmp uge <4 x float> %335, <float 0x39F0000000000000, float
0x39F0000000000000, float 0x39F0000000000000, float 0x39F0000000000000>
%338 = sext <4 x i1> %337 to <4 x i32>
%339 = bitcast <4 x float> %336 to <4 x i32>
%340 = and <4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32
2139095040>, %338
%341 = or <4 x i32> %340, %339
%342 = shl <4 x i32> %333, <i32 3, i32 3, i32 3, i32 3>
%343 = and <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32
-2147483648>, %342
%344 = or <4 x i32> %341, %343
%345 = bitcast <4 x i32> %344 to <4 x float>
%346 = zext <4 x i16> %329 to <4 x i32>
%347 = shl <4 x i32> %346, <i32 13, i32 13, i32 13, i32 13>
%348 = and <4 x i32> %347, <i32 268427264, i32 268427264, i32 268427264, i32
268427264>
%349 = bitcast <4 x i32> %348 to <4 x float>
%350 = fmul <4 x float> %349, <float 0x46F0000000000000, float
0x46F0000000000000, float 0x46F0000000000000, float 0x46F0000000000000>
%351 = fcmp uge <4 x float> %349, <float 0x39F0000000000000, float
0x39F0000000000000, float 0x39F0000000000000, float 0x39F0000000000000>
%352 = sext <4 x i1> %351 to <4 x i32>
%353 = bitcast <4 x float> %350 to <4 x i32>
%354 = and <4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32
2139095040>, %352
%355 = or <4 x i32> %354, %353
%356 = shl <4 x i32> %347, <i32 3, i32 3, i32 3, i32 3>
%357 = and <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32
-2147483648>, %356
%358 = or <4 x i32> %355, %357
%359 = bitcast <4 x i32> %358 to <4 x float>
%360 = zext <4 x i16> %330 to <4 x i32>
%361 = shl <4 x i32> %360, <i32 13, i32 13, i32 13, i32 13>
%362 = and <4 x i32> %361, <i32 268427264, i32 268427264, i32 268427264, i32
268427264>
%363 = bitcast <4 x i32> %362 to <4 x float>
%364 = fmul <4 x float> %363, <float 0x46F0000000000000, float
0x46F0000000000000, float 0x46F0000000000000, float 0x46F0000000000000>
%365 = fcmp uge <4 x float> %363, <float 0x39F0000000000000, float
0x39F0000000000000, float 0x39F0000000000000, float 0x39F0000000000000>
%366 = sext <4 x i1> %365 to <4 x i32>
%367 = bitcast <4 x float> %364 to <4 x i32>
%368 = and <4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32
2139095040>, %366
%369 = or <4 x i32> %368, %367
%370 = shl <4 x i32> %361, <i32 3, i32 3, i32 3, i32 3>
%371 = and <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32
-2147483648>, %370
%372 = or <4 x i32> %369, %371
%373 = bitcast <4 x i32> %372 to <4 x float>
%374 = zext <4 x i16> %331 to <4 x i32>
%375 = shl <4 x i32> %374, <i32 13, i32 13, i32 13, i32 13>
%376 = and <4 x i32> %375, <i32 268427264, i32 268427264, i32 268427264, i32
268427264>
%377 = bitcast <4 x i32> %376 to <4 x float>
%378 = fmul <4 x float> %377, <float 0x46F0000000000000, float
0x46F0000000000000, float 0x46F0000000000000, float 0x46F0000000000000>
%379 = fcmp uge <4 x float> %377, <float 0x39F0000000000000, float
0x39F0000000000000, float 0x39F0000000000000, float 0x39F0000000000000>
%380 = sext <4 x i1> %379 to <4 x i32>
%381 = bitcast <4 x float> %378 to <4 x i32>
%382 = and <4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32
2139095040>, %380
%383 = or <4 x i32> %382, %381
%384 = shl <4 x i32> %375, <i32 3, i32 3, i32 3, i32 3>
%385 = and <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32
-2147483648>, %384
%386 = or <4 x i32> %383, %385
%387 = bitcast <4 x i32> %386 to <4 x float>
%388 = bitcast <4 x i32> %256 to <4 x float>
%389 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %345, <4 x
float> %dst0, <4 x float> %388)
%390 = bitcast <4 x i32> %259 to <4 x float>
%391 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %359, <4 x
float> %dst1, <4 x float> %390)
%392 = bitcast <4 x i32> %262 to <4 x float>
%393 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %373, <4 x
float> %dst015, <4 x float> %392)
%394 = bitcast <4 x i32> %265 to <4 x float>
%395 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %387, <4 x
float> %dst116, <4 x float> %394)
%396 = bitcast <4 x float> %dst2 to <4 x i32>
%397 = and <4 x i32> %396, %268
%398 = xor <4 x i32> %268, <i32 -1, i32 -1, i32 -1, i32 -1>
%399 = and <4 x i32> zeroinitializer, %398
%400 = or <4 x i32> %397, %399
%401 = bitcast <4 x i32> %400 to <4 x float>
%402 = bitcast <4 x float> %dst3 to <4 x i32>
%403 = and <4 x i32> %402, %271
%404 = xor <4 x i32> %271, <i32 -1, i32 -1, i32 -1, i32 -1>
%405 = and <4 x i32> zeroinitializer, %404
%406 = or <4 x i32> %403, %405
%407 = bitcast <4 x i32> %406 to <4 x float>
%408 = bitcast <4 x float> %dst217 to <4 x i32>
%409 = and <4 x i32> %408, %274
%410 = xor <4 x i32> %274, <i32 -1, i32 -1, i32 -1, i32 -1>
%411 = and <4 x i32> zeroinitializer, %410
%412 = or <4 x i32> %409, %411
%413 = bitcast <4 x i32> %412 to <4 x float>
%414 = bitcast <4 x float> %dst318 to <4 x i32>
%415 = and <4 x i32> %414, %277
%416 = xor <4 x i32> %277, <i32 -1, i32 -1, i32 -1, i32 -1>
%417 = and <4 x i32> zeroinitializer, %416
%418 = or <4 x i32> %415, %417
%419 = bitcast <4 x i32> %418 to <4 x float>
%420 = bitcast <4 x float> %389 to <4 x i32>
%421 = bitcast <4 x float> %389 to <4 x i32>
%422 = and <4 x i32> %421, <i32 2147475456, i32 2147475456, i32 2147475456,
i32 2147475456>
%423 = bitcast <4 x i32> %422 to <4 x float>
%424 = fmul <4 x float> %423, <float 0x38F0000000000000, float
0x38F0000000000000, float 0x38F0000000000000, float 0x38F0000000000000>
%425 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %424, <4 x float>
<float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000,
float 0x39EFFC0000000000>)
%426 = bitcast <4 x float> %425 to <4 x i32>
%427 = bitcast <4 x float> %389 to <4 x i32>
%428 = and <4 x i32> %427, <i32 2147483647, i32 2147483647, i32 2147483647,
i32 2147483647>
%429 = bitcast <4 x i32> %428 to <4 x float>
%430 = bitcast <4 x float> %429 to <4 x i32>
%431 = icmp sgt <4 x i32> %430, <i32 2139095040, i32 2139095040, i32
2139095040, i32 2139095040>
%432 = sext <4 x i1> %431 to <4 x i32>
%433 = icmp eq <4 x i32> %430, <i32 2139095040, i32 2139095040, i32
2139095040, i32 2139095040>
%434 = sext <4 x i1> %433 to <4 x i32>
%435 = or <4 x i32> %432, %434
%436 = and <4 x i32> %432, <i32 4194304, i32 4194304, i32 4194304, i32
4194304>
%437 = or <4 x i32> <i32 260046848, i32 260046848, i32 260046848, i32
260046848>, %436
%438 = bitcast <4 x i32> %435 to <16 x i8>
%439 = bitcast <4 x i32> %437 to <16 x i8>
%440 = bitcast <4 x i32> %426 to <16 x i8>
%441 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %440, <16 x i8>
%439, <16 x i8> %438)
%442 = bitcast <16 x i8> %441 to <4 x i32>
%443 = and <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32
-2147483648>, %420
%444 = lshr <4 x i32> %443, <i32 3, i32 3, i32 3, i32 3>
%445 = or <4 x i32> %444, %442
%446 = ashr <4 x i32> %445, <i32 13, i32 13, i32 13, i32 13>
%447 = trunc <4 x i32> %446 to <4 x i16>
%448 = bitcast <4 x float> %391 to <4 x i32>
%449 = bitcast <4 x float> %391 to <4 x i32>
%450 = and <4 x i32> %449, <i32 2147475456, i32 2147475456, i32 2147475456,
i32 2147475456>
%451 = bitcast <4 x i32> %450 to <4 x float>
%452 = fmul <4 x float> %451, <float 0x38F0000000000000, float
0x38F0000000000000, float 0x38F0000000000000, float 0x38F0000000000000>
%453 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %452, <4 x float>
<float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000,
float 0x39EFFC0000000000>)
%454 = bitcast <4 x float> %453 to <4 x i32>
%455 = bitcast <4 x float> %391 to <4 x i32>
%456 = and <4 x i32> %455, <i32 2147483647, i32 2147483647, i32 2147483647,
i32 2147483647>
%457 = bitcast <4 x i32> %456 to <4 x float>
%458 = bitcast <4 x float> %457 to <4 x i32>
%459 = icmp sgt <4 x i32> %458, <i32 2139095040, i32 2139095040, i32
2139095040, i32 2139095040>
%460 = sext <4 x i1> %459 to <4 x i32>
%461 = icmp eq <4 x i32> %458, <i32 2139095040, i32 2139095040, i32
2139095040, i32 2139095040>
%462 = sext <4 x i1> %461 to <4 x i32>
%463 = or <4 x i32> %460, %462
%464 = and <4 x i32> %460, <i32 4194304, i32 4194304, i32 4194304, i32
4194304>
%465 = or <4 x i32> <i32 260046848, i32 260046848, i32 260046848, i32
260046848>, %464
%466 = bitcast <4 x i32> %463 to <16 x i8>
%467 = bitcast <4 x i32> %465 to <16 x i8>
%468 = bitcast <4 x i32> %454 to <16 x i8>
%469 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %468, <16 x i8>
%467, <16 x i8> %466)
%470 = bitcast <16 x i8> %469 to <4 x i32>
%471 = and <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32
-2147483648>, %448
%472 = lshr <4 x i32> %471, <i32 3, i32 3, i32 3, i32 3>
%473 = or <4 x i32> %472, %470
%474 = ashr <4 x i32> %473, <i32 13, i32 13, i32 13, i32 13>
%475 = trunc <4 x i32> %474 to <4 x i16>
%476 = bitcast <4 x float> %393 to <4 x i32>
%477 = bitcast <4 x float> %393 to <4 x i32>
%478 = and <4 x i32> %477, <i32 2147475456, i32 2147475456, i32 2147475456,
i32 2147475456>
%479 = bitcast <4 x i32> %478 to <4 x float>
%480 = fmul <4 x float> %479, <float 0x38F0000000000000, float
0x38F0000000000000, float 0x38F0000000000000, float 0x38F0000000000000>
%481 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %480, <4 x float>
<float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000,
float 0x39EFFC0000000000>)
%482 = bitcast <4 x float> %481 to <4 x i32>
%483 = bitcast <4 x float> %393 to <4 x i32>
%484 = and <4 x i32> %483, <i32 2147483647, i32 2147483647, i32 2147483647,
i32 2147483647>
%485 = bitcast <4 x i32> %484 to <4 x float>
%486 = bitcast <4 x float> %485 to <4 x i32>
%487 = icmp sgt <4 x i32> %486, <i32 2139095040, i32 2139095040, i32
2139095040, i32 2139095040>
%488 = sext <4 x i1> %487 to <4 x i32>
%489 = icmp eq <4 x i32> %486, <i32 2139095040, i32 2139095040, i32
2139095040, i32 2139095040>
%490 = sext <4 x i1> %489 to <4 x i32>
%491 = or <4 x i32> %488, %490
%492 = and <4 x i32> %488, <i32 4194304, i32 4194304, i32 4194304, i32
4194304>
%493 = or <4 x i32> <i32 260046848, i32 260046848, i32 260046848, i32
260046848>, %492
%494 = bitcast <4 x i32> %491 to <16 x i8>
%495 = bitcast <4 x i32> %493 to <16 x i8>
%496 = bitcast <4 x i32> %482 to <16 x i8>
%497 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %496, <16 x i8>
%495, <16 x i8> %494)
%498 = bitcast <16 x i8> %497 to <4 x i32>
%499 = and <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32
-2147483648>, %476
%500 = lshr <4 x i32> %499, <i32 3, i32 3, i32 3, i32 3>
%501 = or <4 x i32> %500, %498
%502 = ashr <4 x i32> %501, <i32 13, i32 13, i32 13, i32 13>
%503 = trunc <4 x i32> %502 to <4 x i16>
%504 = bitcast <4 x float> %395 to <4 x i32>
%505 = bitcast <4 x float> %395 to <4 x i32>
%506 = and <4 x i32> %505, <i32 2147475456, i32 2147475456, i32 2147475456,
i32 2147475456>
%507 = bitcast <4 x i32> %506 to <4 x float>
%508 = fmul <4 x float> %507, <float 0x38F0000000000000, float
0x38F0000000000000, float 0x38F0000000000000, float 0x38F0000000000000>
%509 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %508, <4 x float>
<float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000,
float 0x39EFFC0000000000>)
%510 = bitcast <4 x float> %509 to <4 x i32>
%511 = bitcast <4 x float> %395 to <4 x i32>
%512 = and <4 x i32> %511, <i32 2147483647, i32 2147483647, i32 2147483647,
i32 2147483647>
%513 = bitcast <4 x i32> %512 to <4 x float>
%514 = bitcast <4 x float> %513 to <4 x i32>
%515 = icmp sgt <4 x i32> %514, <i32 2139095040, i32 2139095040, i32
2139095040, i32 2139095040>
%516 = sext <4 x i1> %515 to <4 x i32>
%517 = icmp eq <4 x i32> %514, <i32 2139095040, i32 2139095040, i32
2139095040, i32 2139095040>
%518 = sext <4 x i1> %517 to <4 x i32>
%519 = or <4 x i32> %516, %518
%520 = and <4 x i32> %516, <i32 4194304, i32 4194304, i32 4194304, i32
4194304>
%521 = or <4 x i32> <i32 260046848, i32 260046848, i32 260046848, i32
260046848>, %520
%522 = bitcast <4 x i32> %519 to <16 x i8>
%523 = bitcast <4 x i32> %521 to <16 x i8>
%524 = bitcast <4 x i32> %510 to <16 x i8>
%525 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %524, <16 x i8>
%523, <16 x i8> %522)
%526 = bitcast <16 x i8> %525 to <4 x i32>
%527 = and <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32
-2147483648>, %504
%528 = lshr <4 x i32> %527, <i32 3, i32 3, i32 3, i32 3>
%529 = or <4 x i32> %528, %526
%530 = ashr <4 x i32> %529, <i32 13, i32 13, i32 13, i32 13>
%531 = trunc <4 x i32> %530 to <4 x i16>
%532 = bitcast <4 x float> %401 to <4 x i32>
%533 = bitcast <4 x float> %401 to <4 x i32>
%534 = and <4 x i32> %533, <i32 2147475456, i32 2147475456, i32 2147475456,
i32 2147475456>
%535 = bitcast <4 x i32> %534 to <4 x float>
%536 = fmul <4 x float> %535, <float 0x38F0000000000000, float
0x38F0000000000000, float 0x38F0000000000000, float 0x38F0000000000000>
%537 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %536, <4 x float>
<float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000,
float 0x39EFFC0000000000>)
%538 = bitcast <4 x float> %537 to <4 x i32>
%539 = bitcast <4 x float> %401 to <4 x i32>
%540 = and <4 x i32> %539, <i32 2147483647, i32 2147483647, i32 2147483647,
i32 2147483647>
%541 = bitcast <4 x i32> %540 to <4 x float>
%542 = bitcast <4 x float> %541 to <4 x i32>
%543 = icmp sgt <4 x i32> %542, <i32 2139095040, i32 2139095040, i32
2139095040, i32 2139095040>
%544 = sext <4 x i1> %543 to <4 x i32>
%545 = icmp eq <4 x i32> %542, <i32 2139095040, i32 2139095040, i32
2139095040, i32 2139095040>
%546 = sext <4 x i1> %545 to <4 x i32>
%547 = or <4 x i32> %544, %546
%548 = and <4 x i32> %544, <i32 4194304, i32 4194304, i32 4194304, i32
4194304>
%549 = or <4 x i32> <i32 260046848, i32 260046848, i32 260046848, i32
260046848>, %548
%550 = bitcast <4 x i32> %547 to <16 x i8>
%551 = bitcast <4 x i32> %549 to <16 x i8>
%552 = bitcast <4 x i32> %538 to <16 x i8>
%553 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %552, <16 x i8>
%551, <16 x i8> %550)
%554 = bitcast <16 x i8> %553 to <4 x i32>
%555 = and <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32
-2147483648>, %532
%556 = lshr <4 x i32> %555, <i32 3, i32 3, i32 3, i32 3>
%557 = or <4 x i32> %556, %554
%558 = ashr <4 x i32> %557, <i32 13, i32 13, i32 13, i32 13>
%559 = trunc <4 x i32> %558 to <4 x i16>
%560 = bitcast <4 x float> %407 to <4 x i32>
%561 = bitcast <4 x float> %407 to <4 x i32>
%562 = and <4 x i32> %561, <i32 2147475456, i32 2147475456, i32 2147475456,
i32 2147475456>
%563 = bitcast <4 x i32> %562 to <4 x float>
%564 = fmul <4 x float> %563, <float 0x38F0000000000000, float
0x38F0000000000000, float 0x38F0000000000000, float 0x38F0000000000000>
%565 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %564, <4 x float>
<float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000,
float 0x39EFFC0000000000>)
%566 = bitcast <4 x float> %565 to <4 x i32>
%567 = bitcast <4 x float> %407 to <4 x i32>
%568 = and <4 x i32> %567, <i32 2147483647, i32 2147483647, i32 2147483647,
i32 2147483647>
%569 = bitcast <4 x i32> %568 to <4 x float>
%570 = bitcast <4 x float> %569 to <4 x i32>
%571 = icmp sgt <4 x i32> %570, <i32 2139095040, i32 2139095040, i32
2139095040, i32 2139095040>
%572 = sext <4 x i1> %571 to <4 x i32>
%573 = icmp eq <4 x i32> %570, <i32 2139095040, i32 2139095040, i32
2139095040, i32 2139095040>
%574 = sext <4 x i1> %573 to <4 x i32>
%575 = or <4 x i32> %572, %574
%576 = and <4 x i32> %572, <i32 4194304, i32 4194304, i32 4194304, i32
4194304>
%577 = or <4 x i32> <i32 260046848, i32 260046848, i32 260046848, i32
260046848>, %576
%578 = bitcast <4 x i32> %575 to <16 x i8>
%579 = bitcast <4 x i32> %577 to <16 x i8>
%580 = bitcast <4 x i32> %566 to <16 x i8>
%581 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %580, <16 x i8>
%579, <16 x i8> %578)
%582 = bitcast <16 x i8> %581 to <4 x i32>
%583 = and <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32
-2147483648>, %560
%584 = lshr <4 x i32> %583, <i32 3, i32 3, i32 3, i32 3>
%585 = or <4 x i32> %584, %582
%586 = ashr <4 x i32> %585, <i32 13, i32 13, i32 13, i32 13>
%587 = trunc <4 x i32> %586 to <4 x i16>
%588 = bitcast <4 x float> %413 to <4 x i32>
%589 = bitcast <4 x float> %413 to <4 x i32>
%590 = and <4 x i32> %589, <i32 2147475456, i32 2147475456, i32 2147475456,
i32 2147475456>
%591 = bitcast <4 x i32> %590 to <4 x float>
%592 = fmul <4 x float> %591, <float 0x38F0000000000000, float
0x38F0000000000000, float 0x38F0000000000000, float 0x38F0000000000000>
%593 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %592, <4 x float>
<float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000,
float 0x39EFFC0000000000>)
%594 = bitcast <4 x float> %593 to <4 x i32>
%595 = bitcast <4 x float> %413 to <4 x i32>
%596 = and <4 x i32> %595, <i32 2147483647, i32 2147483647, i32 2147483647,
i32 2147483647>
%597 = bitcast <4 x i32> %596 to <4 x float>
%598 = bitcast <4 x float> %597 to <4 x i32>
%599 = icmp sgt <4 x i32> %598, <i32 2139095040, i32 2139095040, i32
2139095040, i32 2139095040>
%600 = sext <4 x i1> %599 to <4 x i32>
%601 = icmp eq <4 x i32> %598, <i32 2139095040, i32 2139095040, i32
2139095040, i32 2139095040>
%602 = sext <4 x i1> %601 to <4 x i32>
%603 = or <4 x i32> %600, %602
%604 = and <4 x i32> %600, <i32 4194304, i32 4194304, i32 4194304, i32
4194304>
%605 = or <4 x i32> <i32 260046848, i32 260046848, i32 260046848, i32
260046848>, %604
%606 = bitcast <4 x i32> %603 to <16 x i8>
%607 = bitcast <4 x i32> %605 to <16 x i8>
%608 = bitcast <4 x i32> %594 to <16 x i8>
%609 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %608, <16 x i8>
%607, <16 x i8> %606)
%610 = bitcast <16 x i8> %609 to <4 x i32>
%611 = and <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32
-2147483648>, %588
%612 = lshr <4 x i32> %611, <i32 3, i32 3, i32 3, i32 3>
%613 = or <4 x i32> %612, %610
%614 = ashr <4 x i32> %613, <i32 13, i32 13, i32 13, i32 13>
%615 = trunc <4 x i32> %614 to <4 x i16>
%616 = bitcast <4 x float> %419 to <4 x i32>
%617 = bitcast <4 x float> %419 to <4 x i32>
%618 = and <4 x i32> %617, <i32 2147475456, i32 2147475456, i32 2147475456,
i32 2147475456>
%619 = bitcast <4 x i32> %618 to <4 x float>
%620 = fmul <4 x float> %619, <float 0x38F0000000000000, float
0x38F0000000000000, float 0x38F0000000000000, float 0x38F0000000000000>
%621 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %620, <4 x float>
<float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000,
float 0x39EFFC0000000000>)
%622 = bitcast <4 x float> %621 to <4 x i32>
%623 = bitcast <4 x float> %419 to <4 x i32>
%624 = and <4 x i32> %623, <i32 2147483647, i32 2147483647, i32 2147483647,
i32 2147483647>
%625 = bitcast <4 x i32> %624 to <4 x float>
%626 = bitcast <4 x float> %625 to <4 x i32>
%627 = icmp sgt <4 x i32> %626, <i32 2139095040, i32 2139095040, i32
2139095040, i32 2139095040>
%628 = sext <4 x i1> %627 to <4 x i32>
%629 = icmp eq <4 x i32> %626, <i32 2139095040, i32 2139095040, i32
2139095040, i32 2139095040>
%630 = sext <4 x i1> %629 to <4 x i32>
%631 = or <4 x i32> %628, %630
%632 = and <4 x i32> %628, <i32 4194304, i32 4194304, i32 4194304, i32
4194304>
%633 = or <4 x i32> <i32 260046848, i32 260046848, i32 260046848, i32
260046848>, %632
%634 = bitcast <4 x i32> %631 to <16 x i8>
%635 = bitcast <4 x i32> %633 to <16 x i8>
%636 = bitcast <4 x i32> %622 to <16 x i8>
%637 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %636, <16 x i8>
%635, <16 x i8> %634)
%638 = bitcast <16 x i8> %637 to <4 x i32>
%639 = and <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32
-2147483648>, %616
%640 = lshr <4 x i32> %639, <i32 3, i32 3, i32 3, i32 3>
%641 = or <4 x i32> %640, %638
%642 = ashr <4 x i32> %641, <i32 13, i32 13, i32 13, i32 13>
%643 = trunc <4 x i32> %642 to <4 x i16>
%644 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> zeroinitializer, <4
x float> <float 0x39EFFC0000000000, float 0x39EFFC0000000000, float
0x39EFFC0000000000, float 0x39EFFC0000000000>)
%645 = bitcast <4 x float> %644 to <4 x i32>
%646 = and <4 x i32> %645, <i32 -1, i32 -1, i32 -1, i32 -1>
%647 = or <4 x i32> zeroinitializer, %646
%648 = or <4 x i32> zeroinitializer, %647
%649 = ashr <4 x i32> %648, <i32 13, i32 13, i32 13, i32 13>
%650 = trunc <4 x i32> %649 to <4 x i16>
%651 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> zeroinitializer, <4
x float> <float 0x39EFFC0000000000, float 0x39EFFC0000000000, float
0x39EFFC0000000000, float 0x39EFFC0000000000>)
%652 = bitcast <4 x float> %651 to <4 x i32>
%653 = and <4 x i32> %652, <i32 -1, i32 -1, i32 -1, i32 -1>
%654 = or <4 x i32> zeroinitializer, %653
%655 = or <4 x i32> zeroinitializer, %654
%656 = ashr <4 x i32> %655, <i32 13, i32 13, i32 13, i32 13>
%657 = trunc <4 x i32> %656 to <4 x i16>
%658 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> zeroinitializer, <4
x float> <float 0x39EFFC0000000000, float 0x39EFFC0000000000, float
0x39EFFC0000000000, float 0x39EFFC0000000000>)
%659 = bitcast <4 x float> %658 to <4 x i32>
%660 = and <4 x i32> %659, <i32 -1, i32 -1, i32 -1, i32 -1>
%661 = or <4 x i32> zeroinitializer, %660
%662 = or <4 x i32> zeroinitializer, %661
%663 = ashr <4 x i32> %662, <i32 13, i32 13, i32 13, i32 13>
%664 = trunc <4 x i32> %663 to <4 x i16>
%665 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> zeroinitializer, <4
x float> <float 0x39EFFC0000000000, float 0x39EFFC0000000000, float
0x39EFFC0000000000, float 0x39EFFC0000000000>)
%666 = bitcast <4 x float> %665 to <4 x i32>
%667 = and <4 x i32> %666, <i32 -1, i32 -1, i32 -1, i32 -1>
%668 = or <4 x i32> zeroinitializer, %667
%669 = or <4 x i32> zeroinitializer, %668
%670 = ashr <4 x i32> %669, <i32 13, i32 13, i32 13, i32 13>
%671 = trunc <4 x i32> %670 to <4 x i16>
%672 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> zeroinitializer, <4
x float> <float 0x39EFFC0000000000, float 0x39EFFC0000000000, float
0x39EFFC0000000000, float 0x39EFFC0000000000>)
%673 = bitcast <4 x float> %672 to <4 x i32>
%674 = and <4 x i32> %673, <i32 -1, i32 -1, i32 -1, i32 -1>
%675 = or <4 x i32> zeroinitializer, %674
%676 = or <4 x i32> zeroinitializer, %675
%677 = ashr <4 x i32> %676, <i32 13, i32 13, i32 13, i32 13>
%678 = trunc <4 x i32> %677 to <4 x i16>
%679 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> zeroinitializer, <4
x float> <float 0x39EFFC0000000000, float 0x39EFFC0000000000, float
0x39EFFC0000000000, float 0x39EFFC0000000000>)
%680 = bitcast <4 x float> %679 to <4 x i32>
%681 = and <4 x i32> %680, <i32 -1, i32 -1, i32 -1, i32 -1>
%682 = or <4 x i32> zeroinitializer, %681
%683 = or <4 x i32> zeroinitializer, %682
%684 = ashr <4 x i32> %683, <i32 13, i32 13, i32 13, i32 13>
%685 = trunc <4 x i32> %684 to <4 x i16>
%686 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> zeroinitializer, <4
x float> <float 0x39EFFC0000000000, float 0x39EFFC0000000000, float
0x39EFFC0000000000, float 0x39EFFC0000000000>)
%687 = bitcast <4 x float> %686 to <4 x i32>
%688 = and <4 x i32> %687, <i32 -1, i32 -1, i32 -1, i32 -1>
%689 = or <4 x i32> zeroinitializer, %688
%690 = or <4 x i32> zeroinitializer, %689
%691 = ashr <4 x i32> %690, <i32 13, i32 13, i32 13, i32 13>
%692 = trunc <4 x i32> %691 to <4 x i16>
%693 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> zeroinitializer, <4
x float> <float 0x39EFFC0000000000, float 0x39EFFC0000000000, float
0x39EFFC0000000000, float 0x39EFFC0000000000>)
%694 = bitcast <4 x float> %693 to <4 x i32>
%695 = and <4 x i32> %694, <i32 -1, i32 -1, i32 -1, i32 -1>
%696 = or <4 x i32> zeroinitializer, %695
%697 = or <4 x i32> zeroinitializer, %696
%698 = ashr <4 x i32> %697, <i32 13, i32 13, i32 13, i32 13>
%699 = trunc <4 x i32> %698 to <4 x i16>
%700 = shufflevector <4 x i16> %447, <4 x i16> %447, <3 x i32> <i32 0, i32 1,
i32 2>
%701 = shufflevector <4 x i16> %475, <4 x i16> %475, <3 x i32> <i32 0, i32 1,
i32 2>
%702 = shufflevector <4 x i16> %503, <4 x i16> %503, <3 x i32> <i32 0, i32 1,
i32 2>
%703 = shufflevector <4 x i16> %531, <4 x i16> %531, <3 x i32> <i32 0, i32 1,
i32 2>
%704 = shufflevector <4 x i16> %559, <4 x i16> %559, <3 x i32> <i32 0, i32 1,
i32 2>
%705 = shufflevector <4 x i16> %587, <4 x i16> %587, <3 x i32> <i32 0, i32 1,
i32 2>
%706 = shufflevector <4 x i16> %615, <4 x i16> %615, <3 x i32> <i32 0, i32 1,
i32 2>
%707 = shufflevector <4 x i16> %643, <4 x i16> %643, <3 x i32> <i32 0, i32 1,
i32 2>
%708 = shufflevector <4 x i16> %650, <4 x i16> %650, <3 x i32> <i32 0, i32 1,
i32 2>
%709 = shufflevector <4 x i16> %657, <4 x i16> %657, <3 x i32> <i32 0, i32 1,
i32 2>
%710 = shufflevector <4 x i16> %664, <4 x i16> %664, <3 x i32> <i32 0, i32 1,
i32 2>
%711 = shufflevector <4 x i16> %671, <4 x i16> %671, <3 x i32> <i32 0, i32 1,
i32 2>
%712 = shufflevector <4 x i16> %678, <4 x i16> %678, <3 x i32> <i32 0, i32 1,
i32 2>
%713 = shufflevector <4 x i16> %685, <4 x i16> %685, <3 x i32> <i32 0, i32 1,
i32 2>
%714 = shufflevector <4 x i16> %692, <4 x i16> %692, <3 x i32> <i32 0, i32 1,
i32 2>
%715 = shufflevector <4 x i16> %699, <4 x i16> %699, <3 x i32> <i32 0, i32 1,
i32 2>
%716 = mul i32 0, %209
%717 = add i32 0, %716
%718 = getelementptr <16 x i8>* %color_ptr0, i32 0, i32 %717
%719 = bitcast i8* %718 to <3 x i16>*
store <3 x i16> %700, <3 x i16>* %719, align 2
%720 = mul i32 0, %209
%721 = add i32 6, %720
%722 = getelementptr <16 x i8>* %color_ptr0, i32 0, i32 %721
%723 = bitcast i8* %722 to <3 x i16>*
store <3 x i16> %701, <3 x i16>* %723, align 2
%724 = mul i32 0, %209
%725 = add i32 12, %724
%726 = getelementptr <16 x i8>* %color_ptr0, i32 0, i32 %725
%727 = bitcast i8* %726 to <3 x i16>*
store <3 x i16> %702, <3 x i16>* %727, align 2
%728 = mul i32 0, %209
%729 = add i32 18, %728
%730 = getelementptr <16 x i8>* %color_ptr0, i32 0, i32 %729
%731 = bitcast i8* %730 to <3 x i16>*
store <3 x i16> %703, <3 x i16>* %731, align 2
call void @llvm.x86.sse.ldmxcsr(i32* %mxcsr_ptr)
ret void
}
src/gallium/auxiliary/gallivm/lp_bld_init.c:605:gallivm_verify_function:
Assertion `0' failed.
(gdb) bt
#0 0x00007f1c5f7df609 in _debug_assert_fail (expr=0x7f1c602c8088 "0",
file=0x7f1c602c7fd0 "src/gallium/auxiliary/gallivm/lp_bld_init.c", line=605,
function=0x7f1c602c8130 <__func__.8132> "gallivm_verify_function") at
src/gallium/auxiliary/util/u_debug.c:278
#1 0x00007f1c5f86719a in gallivm_verify_function (gallivm=0x15a7560,
func=0x1511a80) at src/gallium/auxiliary/gallivm/lp_bld_init.c:605
#2 0x00007f1c5f44000c in generate_fragment (lp=0x9c08b0, shader=0xa0a2a0,
variant=0x161ff80, partial_mask=1) at
src/gallium/drivers/llvmpipe/lp_state_fs.c:2373
#3 0x00007f1c5f440b15 in generate_variant (lp=0x9c08b0, shader=0xa0a2a0,
key=0x7fff901955d0) at src/gallium/drivers/llvmpipe/lp_state_fs.c:2542
#4 0x00007f1c5f4420f9 in llvmpipe_update_fs (lp=0x9c08b0) at
src/gallium/drivers/llvmpipe/lp_state_fs.c:3089
#5 0x00007f1c5f438549 in llvmpipe_update_derived (llvmpipe=0x9c08b0) at
src/gallium/drivers/llvmpipe/lp_state_derived.c:186
#6 0x00007f1c5f412dfc in llvmpipe_draw_vbo (pipe=0x9c08b0,
info=0x7fff90195870) at src/gallium/drivers/llvmpipe/lp_draw_arrays.c:64
#7 0x00007f1c5f7e267b in util_draw_arrays_instanced (pipe=0x9c08b0, mode=6,
start=0, count=4, start_instance=0, instance_count=1)
at src/gallium/auxiliary/util/u_draw.h:99
#8 0x00007f1c5f7e5066 in blitter_draw (ctx=0x9fa570, x1=0, y1=0, x2=16, y2=1,
depth=0, num_instances=1) at src/gallium/auxiliary/util/u_blitter.c:941
#9 0x00007f1c5f7e510f in util_blitter_draw_rectangle (blitter=0x9fa570, x1=0,
y1=0, x2=16, y2=1, depth=0, type=UTIL_BLITTER_ATTRIB_TEXCOORD,
attrib=0x7fff90195a40) at src/gallium/auxiliary/util/u_blitter.c:965
#10 0x00007f1c5f7e6631 in util_blitter_blit_generic (blitter=0x9fa570,
dst=0x168df10, dstbox=0x7fff90195c0c, src=0x1535a80, srcbox=0x7fff90195c34,
src_width0=384, src_height0=16, mask=15, filter=0, scissor=0x0,
copy_all_samples=1 '\001') at src/gallium/auxiliary/util/u_blitter.c:1396
#11 0x00007f1c5f7e6c1e in util_blitter_blit (blitter=0x9fa570,
info=0x7fff90195c00) at src/gallium/auxiliary/util/u_blitter.c:1489
#12 0x00007f1c5f44997b in lp_blit (pipe=0x9c08b0, blit_info=0x7fff90195d30) at
src/gallium/drivers/llvmpipe/lp_surface.c:232
#13 0x00007f1c5f5ab93a in st_CopyTexSubImage (ctx=0x7f1c558e8010, dims=1,
texImage=0xc7da40, destX=0, destY=0, slice=0, rb=0xa48510, srcX=0, srcY=0,
width=16,
height=1) at src/mesa/state_tracker/st_cb_texture.c:1374
#14 0x00007f1c5f543ae3 in copytexsubimage_by_slice (ctx=0x7f1c558e8010,
texImage=0xc7da40, dims=1, xoffset=0, yoffset=0, zoffset=0, rb=0xa48510, x=0,
y=0,
width=16, height=1) at src/mesa/main/teximage.c:3423
#15 0x00007f1c5f543f7c in copyteximage (ctx=0x7f1c558e8010, dims=1,
target=3552, level=0, internalFormat=34843, x=0, y=0, width=16, height=1,
border=0)
at src/mesa/main/teximage.c:3516
#16 0x00007f1c5f544065 in _mesa_CopyTexImage1D (target=3552, level=0,
internalFormat=34843, x=0, y=0, width=16, border=0) at
src/mesa/main/teximage.c:3541
#17 0x000000000040215a in test_target_and_format (x=80, y=0, target=3552,
format=34843, expected=0x4030e0 <fcolor+32>)
at piglit/tests/texturing/copyteximage.c:393
#18 0x0000000000402e25 in piglit_display () at
piglit/tests/texturing/copyteximage.c:581
#19 0x00007f1c62afb6c8 in display () at
piglit/tests/util/piglit-framework-gl/piglit_glut_framework.c:60
#20 0x00007f1c62292244 in fghRedrawWindow (window=0x9bce00) at
freeglut_main.c:231
#21 fghcbDisplayWindow (window=0x9bce00, enumerator=0x7fff901960a0) at
freeglut_main.c:248
#22 0x00007f1c62295aa9 in fgEnumWindows
(enumCallback=enumCallback at entry=0x7f1c622921d0 <fghcbDisplayWindow>,
enumerator=enumerator at entry=0x7fff901960a0)
at freeglut_structure.c:396
#23 0x00007f1c622927fd in fghDisplayAll () at freeglut_main.c:271
#24 glutMainLoopEvent () at freeglut_main.c:1523
#25 0x00007f1c62292ffd in glutMainLoop () at freeglut_main.c:1571
#26 0x00007f1c62afb8f7 in run_test (gl_fw=0x7f1c62de9340 <glut_fw>, argc=2,
argv=0x7fff90196478)
at piglit/tests/util/piglit-framework-gl/piglit_glut_framework.c:142
#27 0x00007f1c62af96c9 in piglit_gl_test_run (argc=2, argv=0x7fff90196478,
config=0x7fff90196340)
at piglit/tests/util/piglit-framework-gl.c:191
#28 0x000000000040175e in main (argc=2, argv=0x7fff90196478) at
piglit/tests/texturing/copyteximage.c:121
(gdb) frame 1
#1 0x00007f1c5f86719a in gallivm_verify_function (gallivm=0x15a7560,
func=0x1511a80) at src/gallium/auxiliary/gallivm/lp_bld_init.c:605
605 assert(0);
(gdb) l
600 {
601 /* Verify the LLVM IR. If invalid, dump and abort */
602 #ifdef DEBUG
603 if (LLVMVerifyFunction(func, LLVMPrintMessageAction)) {
604 lp_debug_dump_value(func);
605 assert(0);
606 return;
607 }
608 #endif
609
155139059ba588da1161eaa692515cacdead9f4e is the first bad commit
commit 155139059ba588da1161eaa692515cacdead9f4e
Author: Zack Rusin <zackr at vmware.com>
Date: Fri Dec 6 01:28:25 2013 -0500
llvmpipe: fix blending with half-float formats
The fact that we flush denorms to zero breaks our half-float
conversion and blending. This patches enables denorms for
blending. It's a little tricky due to the llvm bug that makes
it incorrectly reorder the mxcsr intrinsics:
http://llvm.org/bugs/show_bug.cgi?id=6393
Signed-off-by: Zack Rusin <zackr at vmware.com>
Reviewed-by: José Fonseca <jfonseca at vmware.com>
Reviewed-by: Roland Scheidegger <sroland at vmware.com>
Signed-off-by: Zack Rusin <zackr at vmware.com>
:040000 040000 5564b14ae9b7870ae8f4845f2069878c0752d5bc
84eb2a43a71956a57817e373117c2e2952a058d2 M src
bisect run success
--
You are receiving this mail because:
You are the assignee for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20131211/b3cd3105/attachment-0001.html>
More information about the mesa-dev
mailing list