[Piglit] [PATCH] cl: Add tests for function calls
Jan Vesely
jan.vesely at rutgers.edu
Tue Sep 19 05:17:31 UTC 2017
On Mon, 2017-09-18 at 18:53 -0700, Matt Arsenault wrote:
> Passes on ROCm, I haven't tried clover recently. Last
> time I did it errored because the AsmParser wasn't properly
> initialized.
>
> v2: Fix non-unique test names, Wrap noinline in unguarded macro,
> use prettier test names, use device_regex (effectively restricting to ROCm)
thanks.
Reviewed-by: Jan Vesely <jan.vesely at rutgers.edu>
I want to run this on my carrizo machine before pushing, should be
~wednesday.
Jan
> ---
> tests/cl/program/execute/call-clobbers-amdgcn.cl | 68 +++
> tests/cl/program/execute/calls-struct.cl | 179 +++++++
> tests/cl/program/execute/calls-workitem-id.cl | 77 +++
> tests/cl/program/execute/calls.cl | 607 +++++++++++++++++++++++
> tests/cl/program/execute/tail-calls.cl | 305 ++++++++++++
> 5 files changed, 1236 insertions(+)
> create mode 100644 tests/cl/program/execute/call-clobbers-amdgcn.cl
> create mode 100644 tests/cl/program/execute/calls-struct.cl
> create mode 100644 tests/cl/program/execute/calls-workitem-id.cl
> create mode 100644 tests/cl/program/execute/calls.cl
> create mode 100644 tests/cl/program/execute/tail-calls.cl
>
> diff --git a/tests/cl/program/execute/call-clobbers-amdgcn.cl b/tests/cl/program/execute/call-clobbers-amdgcn.cl
> new file mode 100644
> index 000000000..400771795
> --- /dev/null
> +++ b/tests/cl/program/execute/call-clobbers-amdgcn.cl
> @@ -0,0 +1,68 @@
> +/*!
> +
> +[config]
> +name: amdgcn call clobbers
> +clc_version_min: 10
> +device_regex: gfx[\d]*
> +
> +[test]
> +name: callee saved sgpr
> +kernel_name: call_clobber_s40
> +dimensions: 1
> +global_size: 1 0 0
> +arg_out: 0 buffer int[1] 0xabcd1234
> +
> +[test]
> +name: callee saved vgpr
> +kernel_name: call_clobber_v40
> +dimensions: 1
> +global_size: 1 0 0
> +arg_out: 0 buffer int[1] 0xabcd1234
> +
> +!*/
> +
> +#ifndef __AMDGCN__
> +#error This test is only for amdgcn
> +#endif
> +
> +__attribute__((noinline))
> +void clobber_s40()
> +{
> + __asm volatile("s_mov_b32 s40, 0xdead" : : : "s40");
> +}
> +
> +kernel void call_clobber_s40(__global int* ret)
> +{
> + __asm volatile("s_mov_b32 s40, 0xabcd1234" : : : "s40");
> +
> + clobber_s40();
> +
> + int tmp;
> +
> + __asm volatile("v_mov_b32 %0, s40"
> + : "=v"(tmp)
> + :
> + : "s40");
> + *ret = tmp;
> +}
> +
> +__attribute__((noinline))
> +void clobber_v40()
> +{
> + __asm volatile("v_mov_b32 v40, 0xdead" : : : "v40");
> +}
> +
> +kernel void call_clobber_v40(__global int* ret)
> +{
> + __asm volatile("v_mov_b32 v40, 0xabcd1234" : : : "v40");
> +
> + clobber_v40();
> +
> + int tmp;
> + __asm volatile("v_mov_b32 %0, v40"
> + : "=v"(tmp)
> + :
> + : "v40");
> + *ret = tmp;
> +}
> +
> diff --git a/tests/cl/program/execute/calls-struct.cl b/tests/cl/program/execute/calls-struct.cl
> new file mode 100644
> index 000000000..04f769dac
> --- /dev/null
> +++ b/tests/cl/program/execute/calls-struct.cl
> @@ -0,0 +1,179 @@
> +/*!
> +
> +[config]
> +name: calls with structs
> +clc_version_min: 10
> +
> +[test]
> +name: byval struct
> +kernel_name: call_i32_func_byval_Char_IntArray
> +dimensions: 1
> +global_size: 16 0 0
> +
> +arg_out: 0 buffer int[16] \
> + 1021 1022 1023 1024 1025 1026 1027 1028 \
> + 1029 1030 1031 1032 1033 1034 1035 1036
> +
> +arg_out: 1 buffer int[16] \
> + 14 14 14 14 \
> + 14 14 14 14 \
> + 14 14 14 14 \
> + 14 14 14 14 \
> +
> +arg_in: 2 buffer int[16] \
> + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
> +
> +
> +[test]
> +name: sret struct
> +kernel_name: call_sret_Char_IntArray_func
> +dimensions: 1
> +global_size: 16 0 0
> +
> +arg_out: 0 buffer int[16] \
> + 921 922 923 924 925 926 927 928 \
> + 929 930 931 932 933 934 935 936
> +
> +arg_in: 1 buffer int[16] \
> + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
> +
> +
> +[test]
> +name: byval struct and sret struct
> +kernel_name: call_sret_Char_IntArray_func_byval_Char_IntArray
> +dimensions: 1
> +global_size: 16 0 0
> +
> +arg_out: 0 buffer int[16] \
> + 86 87 88 89 \
> + 90 91 92 93 \
> + 94 95 96 97 \
> + 98 99 100 101
> +
> +arg_out: 1 buffer int[16] \
> + 134 135 136 137 \
> + 138 139 140 141 \
> + 142 143 144 145 \
> + 146 147 148 149
> +
> +arg_in: 2 buffer int[16] \
> + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
> +
> +!*/
> +
> +#define NOINLINE __attribute__((noinline))
> +
> +typedef struct ByVal_Char_IntArray {
> + char c;
> + int i[4];
> +} ByVal_Char_IntArray;
> +
> +NOINLINE
> +int i32_func_byval_Char_IntArray(ByVal_Char_IntArray st)
> +{
> + st.i[0] += 100;
> +
> + int sum = 0;
> + for (int i = 0; i < 4; ++i)
> + {
> + sum += st.i[i];
> + }
> +
> + sum += st.c;
> + return sum;
> +}
> +
> +kernel void call_i32_func_byval_Char_IntArray(global int* out0,
> + global int* out1,
> + global int* input)
> +{
> + ByVal_Char_IntArray st;
> + st.c = 15;
> +
> + int id = get_global_id(0);
> +
> + int val = input[id];
> + st.i[0] = 14;
> + st.i[1] = -8;
> + st.i[2] = val;
> + st.i[3] = 900;
> +
> + int result = i32_func_byval_Char_IntArray(st);
> + out0[id] = result;
> + out1[id] = st.i[0];
> +}
> +
> +NOINLINE
> +ByVal_Char_IntArray sret_Char_IntArray_func(global int* input, int id)
> +{
> + ByVal_Char_IntArray st;
> + st.c = 15;
> +
> + int val = input[id];
> + st.i[0] = 14;
> + st.i[1] = -8;
> + st.i[2] = val;
> + st.i[3] = 900;
> +
> + return st;
> +}
> +
> +kernel void call_sret_Char_IntArray_func(global int* output, global int* input)
> +{
> + int id = get_global_id(0);
> + ByVal_Char_IntArray st = sret_Char_IntArray_func(input, id);
> +
> + int sum = 0;
> + for (int i = 0; i < 4; ++i)
> + {
> + sum += st.i[i];
> + }
> +
> + sum += st.c;
> + output[id] = sum;
> +}
> +
> +NOINLINE
> +ByVal_Char_IntArray sret_Char_IntArray_func_byval_Char_IntArray(ByVal_Char_IntArray st)
> +{
> + st.c += 15;
> +
> + st.i[0] += 14;
> + st.i[1] -= 8;
> + st.i[2] += 9;
> + st.i[3] += 18;
> +
> + return st;
> +}
> +
> +kernel void call_sret_Char_IntArray_func_byval_Char_IntArray(global int* output0,
> + global int* output1,
> + global int* input)
> +{
> + int id = get_global_id(0);
> +
> + volatile ByVal_Char_IntArray st0;
> + st0.c = -20;
> +
> + int val = input[id];
> + st0.i[0] = 14;
> + st0.i[1] = -8;
> + st0.i[2] = val;
> + st0.i[3] = 100;
> +
> + ByVal_Char_IntArray st1 = sret_Char_IntArray_func_byval_Char_IntArray(st0);
> +
> + int sum0 = 0;
> + int sum1 = 0;
> + for (int i = 0; i < 4; ++i)
> + {
> + sum0 += st0.i[i];
> + sum1 += st1.i[i];
> + }
> +
> + sum0 += st0.c;
> + sum1 += st1.c;
> +
> + output0[id] = sum0;
> + output1[id] = sum1;
> +}
> diff --git a/tests/cl/program/execute/calls-workitem-id.cl b/tests/cl/program/execute/calls-workitem-id.cl
> new file mode 100644
> index 000000000..7edfad7e9
> --- /dev/null
> +++ b/tests/cl/program/execute/calls-workitem-id.cl
> @@ -0,0 +1,77 @@
> +/*!
> +
> +[config]
> +name: calls workitem IDs
> +clc_version_min: 10
> +
> +[test]
> +name: Callee function use get_global_id(0)
> +kernel_name: kernel_call_pass_get_global_id_0
> +dimensions: 1
> +global_size: 64 0 0
> +arg_out: 0 buffer uint[64] \
> + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 \
> + 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 \
> + 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 \
> + 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
> +
> +[test]
> +name: Callee function use get_global_id 0..2
> +kernel_name: kernel_call_pass_get_global_id_012
> +dimensions: 3
> +global_size: 8 4 2
> +arg_out: 0 buffer uint[64] \
> + 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 \
> + 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 \
> + 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 \
> + 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7
> +
> +arg_out: 1 buffer uint[64] \
> + 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 \
> + 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 \
> + 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 \
> + 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3
> +
> +arg_out: 2 buffer uint[64] \
> + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \
> + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \
> + 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 \
> + 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
> +
> +!*/
> +
> +#define NOINLINE __attribute__((noinline))
> +
> +NOINLINE
> +void func_get_global_id_0(volatile global uint* out)
> +{
> + uint gid = get_global_id(0);
> + out[gid] = gid;
> +}
> +
> +kernel void kernel_call_pass_get_global_id_0(global uint *out)
> +{
> + func_get_global_id_0(out);
> +}
> +
> +NOINLINE
> +void func_get_global_id_012(volatile global uint* out0,
> + volatile global uint* out1,
> + volatile global uint* out2)
> +{
> + uint id0 = get_global_id(0);
> + uint id1 = get_global_id(1);
> + uint id2 = get_global_id(2);
> + uint flat_id = (id2 * get_global_size(1) + id1) * get_global_size(0) + id0;
> +
> + out0[flat_id] = id0;
> + out1[flat_id] = id1;
> + out2[flat_id] = id2;
> +}
> +
> +kernel void kernel_call_pass_get_global_id_012(global uint *out0,
> + global uint *out1,
> + global uint *out2)
> +{
> + func_get_global_id_012(out0, out1, out2);
> +}
> diff --git a/tests/cl/program/execute/calls.cl b/tests/cl/program/execute/calls.cl
> new file mode 100644
> index 000000000..f4f55be31
> --- /dev/null
> +++ b/tests/cl/program/execute/calls.cl
> @@ -0,0 +1,607 @@
> +/*!
> +
> +[config]
> +name: calls
> +clc_version_min: 10
> +
> +[test]
> +name: Call void_func_void
> +kernel_name: call_void_func_void
> +dimensions: 1
> +global_size: 1 0 0
> +arg_out: 0 buffer int[1] 12345
> +
> +[test]
> +name: Call i32_func_void
> +kernel_name: call_i32_func_void
> +dimensions: 1
> +global_size: 1 0 0
> +arg_out: 0 buffer int[1] 0x12345
> +
> +[test]
> +name: Call i64_func_void
> +kernel_name: call_i64_func_void
> +dimensions: 1
> +global_size: 1 0 0
> +arg_out: 0 buffer long[1] 0x100000000000
> +
> +
> +[test]
> +name: Call call_i32_func_void_callee_stack
> +kernel_name: call_i32_func_void_callee_stack
> +dimensions: 1
> +global_size: 1 0 0
> +arg_out: 0 buffer int[1] 290
> +
> +[test]
> +name: Call call_i32_func_p0i32_i32_caller_stack
> +kernel_name: call_i32_func_p0i32_i32_caller_stack
> +dimensions: 1
> +global_size: 1 0 0
> +arg_out: 0 buffer int[1] 175
> +
> +[test]
> +name: Call call_i32_func_p0i32_i32_indirect_kernel_stack
> +kernel_name: call_i32_func_p0i32_i32_indirect_kernel_stack
> +dimensions: 1
> +global_size: 1 0 0
> +arg_out: 0 buffer int[1] 241
> +
> +[test]
> +name: Call call_i32_func_p0i32_i32_indirect_function_stack
> +kernel_name: call_i32_func_p0i32_i32_indirect_function_stack
> +dimensions: 1
> +global_size: 1 0 0
> +arg_out: 0 buffer int[1] 291
> +
> +[test]
> +name: callee stack corruption
> +kernel_name: kernel_call_nested_stack_usage
> +dimensions: 1
> +global_size: 10 0 0
> +
> +arg_out: 0 buffer int4[10] \
> + 53 48 156 160 \
> + 84 248 102 150 \
> + 102 56 217 106 \
> + 100 123 151 139 \
> + 80 150 135 163 \
> + 223 99 117 199 \
> + 187 262 223 169 \
> + 277 129 73 121 \
> + 162 165 138 137 \
> + 204 207 223 145 \
> +
> +
> +arg_in: 1 buffer int4[10] \
> + 0 13 76 46 \
> + 4 74 33 63 \
> + 26 9 95 7 \
> + 41 54 47 29 \
> + 15 68 38 39 \
> + 91 43 14 95 \
> + 44 83 69 70 \
> + 89 54 14 45 \
> + 77 63 21 21 \
> + 64 70 80 70
> +
> +arg_in: 2 buffer int4[10] \
> + 53 22 4 68 \
> + 76 100 36 24 \
> + 50 38 27 92 \
> + 18 15 57 81 \
> + 50 14 59 85 \
> + 41 13 89 9 \
> + 99 96 85 29 \
> + 99 21 45 31 \
> + 8 39 96 95 \
> + 76 67 63 5
> +
> +[test]
> +name: nested calls
> +kernel_name: kernel_nested_calls
> +dimensions: 1
> +global_size: 4 0 0
> +
> +arg_out: 0 buffer int[4] \
> + 1 7 155 -4
> +
> +arg_in: 1 buffer int[4] \
> + 0 100 1234 -912
> +
> +arg_in: 2 buffer int[4] \
> + 1 4 2 45
> +
> +
> +[test]
> +name: Kernel call stack argument
> +kernel_name: kernel_call_stack_arg
> +dimensions: 1
> +global_size: 10 0 0
> +
> +
> +arg_out: 0 buffer int4[10] \
> + 11440 1348 29304 16698 \
> + 47975 3626 30850 13224 \
> + 8235 30495 31995 1455 \
> + 16048 40512 33992 7028 \
> + 9450 5356 21330 23130 \
> + 21120 35186 52896 49968 \
> + 34083 28520 0 0 \
> + 12384 41492 4420 17880 \
> + 37310 19320 37518 13175 \
> + 23852 16014 22734 24284 \
> +
> +
> +arg_in: 1 buffer int4[10] \
> + 0 13 76 46 \
> + 63 76 100 36 \
> + 27 92 53 46 \
> + 53 50 96 75 \
> + 99 41 14 57 \
> + 35 45 81 94 \
> + 80 71 74 1 \
> + 78 73 32 42 \
> + 60 17 83 15 \
> + 13 53 31 59
> +
> +arg_in: 2 buffer int4[10] \
> + 53 22 4 68 \
> + 24 99 72 76 \
> + 95 5 76 77 \
> + 56 89 63 85 \
> + 25 49 46 97 \
> + 65 21 68 91 \
> + 89 53 46 6 \
> + 68 68 20 84 \
> + 99 25 23 10 \
> + 52 43 26 37
> +
> +arg_in: 3 buffer int4[10] \
> + 68 94 38 52 \
> + 65 7 63 89 \
> + 83 12 1 69 \
> + 16 21 72 13 \
> + 12 20 32 63 \
> + 25 86 47 51 \
> + 72 49 67 68 \
> + 71 83 9 8 \
> + 22 64 70 80 \
> + 39 45 48 39
> +
> +arg_in: 4 buffer int4[10] \
> + 83 3 5 53 \
> + 27 44 77 48 \
> + 87 63 74 73 \
> + 9 27 0 41 \
> + 12 65 62 81 \
> + 60 82 76 46 \
> + 20 92 87 89 \
> + 77 63 21 21 \
> + 70 76 67 63 \
> + 28 7 37 25
> +
> +arg_in: 5 buffer int4[10] \
> + 67 0 38 6 \
> + 24 27 36 16 \
> + 100 89 23 30 \
> + 2 71 94 24 \
> + 25 48 39 20 \
> + 96 63 44 83 \
> + 54 14 45 99 \
> + 8 39 96 95 \
> + 5 60 22 32 \
> + 67 68 51 73
> +
> +arg_in: 6 buffer int4[10] \
> + 42 69 59 93 \
> + 49 90 91 6 \
> + 35 51 59 85 \
> + 18 32 89 65 \
> + 2 91 43 14 \
> + 69 70 99 96 \
> + 21 45 31 51 \
> + 39 27 69 28 \
> + 70 11 77 53 \
> + 72 95 46 94
> +
> +arg_in: 7 buffer int4[10] \
> + 85 53 9 66 \
> + 91 50 52 32 \
> + 41 84 27 41 \
> + 15 68 38 39 \
> + 95 41 13 89 \
> + 85 29 54 51 \
> + 89 44 47 81 \
> + 78 79 42 28 \
> + 55 59 33 71 \
> + 32 46 52 66
> +
> +arg_in: 8 buffer int4[10] \
> + 42 70 91 76 \
> + 99 49 26 9 \
> + 54 47 29 18 \
> + 50 14 59 85 \
> + 9 16 7 36 \
> + 10 41 58 88 \
> + 36 21 100 15 \
> + 19 1 19 99 \
> + 14 16 49 86 \
> + 40 61 99 15
> +
> +arg_in: 9 buffer int4[10] \
> + 26 4 74 33 \
> + 95 7 50 38 \
> + 15 57 81 3 \
> + 59 96 56 14 \
> + 25 13 79 45 \
> + 44 73 87 72 \
> + 63 62 0 0 \
> + 24 82 13 40 \
> + 82 56 74 31 \
> + 67 34 54 52
> +
> +!*/
> +
> +// The inline asm is necessary to defeat interprocedural sparse
> +// conditional constant propagation eliminating some of the trivial
> +// calls.
> +#ifdef __AMDGCN__
> +#define USE_ASM 1
> +#endif
> +
> +#define NOINLINE __attribute__((noinline))
> +
> +NOINLINE
> +void void_func_void(void)
> +{
> +#if USE_ASM
> + __asm("");
> +#endif
> +}
> +
> +kernel void call_void_func_void(__global int* ret)
> +{
> + void_func_void();
> + *ret = 12345;
> +}
> +
> +NOINLINE
> +int i32_func_void(void)
> +{
> + int ret;
> +#if USE_ASM
> + __asm("v_mov_b32 %0, 0x12345" : "=v"(ret));
> +#else
> + ret = 0x12345;
> +#endif
> +
> + return ret;
> +}
> +
> +kernel void call_i32_func_void(__global int* ret)
> +{
> + *ret = i32_func_void();
> +}
> +
> +NOINLINE
> +long i64_func_void(void)
> +{
> + long ret;
> +#if USE_ASM
> + __asm("v_lshlrev_b64 %0, 44, 1" : "=v"(ret));
> +#else
> + ret = 1ull << 44;
> +#endif
> + return ret;
> +}
> +
> +kernel void call_i64_func_void(__global long* ret)
> +{
> + *ret = i64_func_void();
> +}
> +
> +
> +NOINLINE
> +int i32_func_void_callee_stack(void)
> +{
> + int ret;
> +#if USE_ASM
> + __asm("v_mov_b32 %0, 0x64" : "=v"(ret));
> +#else
> + ret = 0x64;
> +#endif
> +
> + volatile int alloca[20];
> +
> + for (int i = 0; i < 20; ++i)
> + {
> + alloca[i] = i;
> + }
> +
> + for (int i = 0; i < 20; ++i)
> + {
> + ret += alloca[i];
> + }
> +
> + return ret;
> +}
> +
> +kernel void call_i32_func_void_callee_stack(__global int* ret)
> +{
> + volatile int alloca[10];
> +
> + for (int i = 0; i < 10; ++i)
> + {
> + alloca[i] = 0xffff;
> + }
> +
> +
> + *ret = i32_func_void_callee_stack();
> +}
> +
> +NOINLINE
> +int i32_func_p0i32_i32_caller_stack(volatile int* stack, int n)
> +{
> + int ret;
> +#if USE_ASM
> + __asm("v_mov_b32 %0, 0x64" : "=v"(ret));
> +#else
> + ret = 0x64;
> +#endif
> +
> + for (int i = 0; i < n; ++i)
> + {
> + ret += stack[i];
> + }
> +
> + return ret;
> +}
> +
> +kernel void call_i32_func_p0i32_i32_caller_stack(__global int* ret)
> +{
> + volatile int alloca[10];
> +
> + for (int i = 0; i < 10; ++i)
> + {
> + alloca[i] = 3 + i;
> + }
> +
> + *ret = i32_func_p0i32_i32_caller_stack(alloca, 10);
> +}
> +
> +NOINLINE
> +int i32_func_p0i32_i32_indirect_stack(volatile int* stack, int n)
> +{
> + int ret;
> +#if USE_ASM
> + __asm("v_mov_b32 %0, 0x64" : "=v"(ret));
> +#else
> + ret = 0x64;
> +#endif
> + for (int i = 0; i < n; ++i)
> + {
> + ret += stack[i];
> + }
> +
> + return ret;
> +}
> +
> +// Access a stack object in the parent kernel's frame.
> +NOINLINE
> +int i32_func_p0i32_i32_pass_kernel_stack(volatile int* stack, int n)
> +{
> + int ret;
> +#if USE_ASM
> + __asm("v_mov_b32 %0, 0x42" : "=v"(ret));
> +#else
> + ret = 0x42;
> +#endif
> +
> + volatile int local_object[10];
> + for (int i = 0; i < 10; ++i)
> + local_object[i] = -1;
> +
> + ret += i32_func_p0i32_i32_indirect_stack(stack, n);
> +
> + return ret;
> +}
> +
> +kernel void call_i32_func_p0i32_i32_indirect_kernel_stack(volatile __global int* ret)
> +{
> + volatile int alloca[10];
> +
> + for (int i = 0; i < 10; ++i)
> + {
> + alloca[i] = 3 + i;
> + }
> +
> + *ret = i32_func_p0i32_i32_pass_kernel_stack(alloca, 10);
> +}
> +
> +// Access a stack object in a parent non-kernel function's stack frame.
> +NOINLINE
> +int i32_func_void_pass_function_stack()
> +{
> + int ret;
> +#if USE_ASM
> + __asm("v_mov_b32 %0, 0x42" : "=v"(ret));
> +#else
> + ret = 0x42;
> +#endif
> +
> + volatile int local_object[10];
> + for (int i = 0; i < 10; ++i)
> + local_object[i] = 8 + i;
> +
> + ret += i32_func_p0i32_i32_indirect_stack(local_object, 10);
> + return ret;
> +}
> +
> +kernel void call_i32_func_p0i32_i32_indirect_function_stack(__global int* ret)
> +{
> + *ret = i32_func_void_pass_function_stack();
> +}
> +
> +NOINLINE
> +int4 v4i32_func_v4i32_v4i32_stack(int4 arg0, int4 arg1)
> +{
> + // Force stack usage.
> + volatile int4 args[8] = { arg0, arg1 };
> +
> + int4 total = 0;
> + for (int i = 0; i < 8; ++i)
> + {
> + total += args[i];
> + }
> +
> + return total;
> +}
> +
> +// Make sure using stack in a callee function from a callee function
> +// doesn't corrupt caller's stack objects.
> +NOINLINE
> +int4 nested_stack_usage_v4i32_func_v4i32_v4i32(int4 arg0, int4 arg1)
> +{
> + volatile int stack_object[4];
> + for (int i = 0; i < 4; ++i) {
> + const int test_val = 0x04030200 | i;
> + stack_object[i] = test_val;
> + }
> +
> + arg0 *= 2;
> +
> + int4 result = v4i32_func_v4i32_v4i32_stack(arg0, arg1);
> +
> + // Check for stack corruption
> + for (int i = 0; i < 4; ++i)
> + {
> + const int test_val = 0x04030200 | i;
> + if (stack_object[i] != test_val)
> + result = -1;
> + }
> +
> + return result;
> +}
> +
> +kernel void kernel_call_nested_stack_usage(global int4* output,
> + global int4* input0,
> + global int4* input1)
> +{
> + int id = get_global_id(0);
> + output[id] = nested_stack_usage_v4i32_func_v4i32_v4i32(
> + input0[id],
> + input1[id]);
> +}
> +
> +NOINLINE
> +int func_div_add(int x, int y)
> +{
> + return x / y + 4;
> +}
> +
> +NOINLINE
> +int call_i32_func_i32_i32(int x, int y, volatile int* ptr)
> +{
> + int tmp = func_div_add(x, y) >> 2;
> + return tmp + *ptr;
> +}
> +
> +kernel void kernel_nested_calls(global int* output,
> + global int* input0,
> + global int* input1)
> +{
> + int id = get_global_id(0);
> + volatile int zero = 0;
> + output[id] = call_i32_func_i32_i32(input0[id], input1[id], &zero);
> +}
> +
> +NOINLINE
> +int4 v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32(
> + int4 arg0, int4 arg1, int4 arg2, int4 arg3,
> + int4 arg4, int4 arg5, int4 arg6, int4 arg7,
> + int4 arg8)
> +{
> + // Try to make sure we can't clobber the incoming stack arguments
> + // with local stack objects.
> + volatile int4 args[8] = { arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 };
> + volatile int4 last_arg = arg8;
> +
> + int4 total = 0;
> + for (int i = 0; i < 8; ++i)
> + {
> + total += args[i];
> + }
> +
> + return total * last_arg;
> +}
> +
> + // Test argument passed on stack, but doesn't use byval.
> +NOINLINE
> +int4 stack_arg_v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32(
> + int4 arg0, int4 arg1, int4 arg2, int4 arg3,
> + int4 arg4, int4 arg5, int4 arg6, int4 arg7,
> + int4 arg8)
> +{
> + volatile int stack_object[8];
> + for (int i = 0; i < 8; ++i) {
> + const int test_val = 0x04030200 | i;
> + stack_object[i] = test_val;
> + }
> +
> + arg0 *= 2;
> +
> + int4 result = v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32(
> + arg0, arg1, arg2, arg3, arg4,
> + arg5, arg6, arg7, arg8);
> +
> + // Check for stack corruption.
> + for (int i = 0; i < 8; ++i)
> + {
> + const int test_val = 0x04030200 | i;
> + if (stack_object[i] != test_val)
> + result = -1;
> + }
> +
> + return result;
> +}
> +
> +kernel void kernel_call_stack_arg(global int4* output,
> + global int4* input0,
> + global int4* input1,
> + global int4* input2,
> + global int4* input3,
> + global int4* input4,
> + global int4* input5,
> + global int4* input6,
> + global int4* input7,
> + global int4* input8)
> +{
> + int id = get_global_id(0);
> +
> + volatile int stack_object[8];
> + for (int i = 0; i < 8; ++i) {
> + const int test_val = 0x05060700 | i;
> + stack_object[i] = test_val;
> + }
> +
> + output[id] = stack_arg_v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32(
> + input0[id],
> + input1[id],
> + input2[id],
> + input3[id],
> + input4[id],
> + input5[id],
> + input6[id],
> + input7[id],
> + input8[id]);
> +
> + // Check for stack corruption.
> + for (int i = 0; i < 8; ++i)
> + {
> + const int test_val = 0x05060700 | i;
> + if (stack_object[i] != test_val)
> + output[id] = -1;
> + }
> +
> +}
> diff --git a/tests/cl/program/execute/tail-calls.cl b/tests/cl/program/execute/tail-calls.cl
> new file mode 100644
> index 000000000..cf33373cc
> --- /dev/null
> +++ b/tests/cl/program/execute/tail-calls.cl
> @@ -0,0 +1,305 @@
> +/*!
> +
> +[config]
> +name: tail calls
> +clc_version_min: 10
> +dimensions: 1
> +
> +[test]
> +name: Basic tail call
> +kernel_name: kernel_call_tailcall
> +global_size: 4 0 0
> +
> +arg_out: 0 buffer int[4] \
> + 4 11 107 -12
> +
> +arg_in: 1 buffer int[4] \
> + 0 100 1234 -912
> +
> +arg_in: 2 buffer int[4] \
> + 1 4 2 45
> +
> +[test]
> +name: Tail call with more arguments than caller
> +kernel_name: kernel_call_tailcall_extra_arg
> +global_size: 4 0 0
> +
> +arg_out: 0 buffer int[4] \
> + 2 112 1340 -882
> +
> +arg_in: 1 buffer int[4] \
> + 0 100 1234 -912
> +
> +arg_in: 2 buffer int[4] \
> + 1 4 2 45
> +
> +[test]
> +name: Tail call with fewer arguments than acller
> +kernel_name: kernel_call_tailcall_fewer_args
> +global_size: 4 0 0
> +
> +arg_out: 0 buffer int[4] \
> + 4 8 81 -10
> +
> +arg_in: 1 buffer int[4] \
> + 0 100 1234 -912
> +
> +arg_in: 2 buffer int[4] \
> + 1 4 2 45
> +
> +arg_in: 3 buffer int[4] \
> + 3 8 4 9
> +
> +[test]
> +name: Tail call with stack passed argument
> +kernel_name: kernel_call_tailcall_stack_passed_args
> +global_size: 10 0 0
> +
> +arg_out: 0 buffer int4[10] \
> + 11440 8762 10296 13156 \
> + 19649 31311 18081 24745 \
> + 10476 11772 17766 11070 \
> + 22165 18005 28665 35945 \
> + 624 938 768 990 \
> + 30618 28791 30240 31815 \
> + 49851 47676 46806 47676 \
> + 4400 4272 3392 2632 \
> + 10582 8712 8514 7854 \
> + 19737 21199 23865 18533 \
> +
> +
> +arg_in: 1 buffer int4[10] \
> + 0 13 76 46 \
> + 4 74 33 63 \
> + 26 9 95 7 \
> + 41 54 47 29 \
> + 15 68 38 39 \
> + 91 43 14 95 \
> + 44 83 69 70 \
> + 89 54 14 45 \
> + 77 63 21 21 \
> + 64 70 80 70
> +
> +arg_in: 2 buffer int4[10] \
> + 53 22 4 68 \
> + 76 100 36 24 \
> + 50 38 27 92 \
> + 18 15 57 81 \
> + 50 14 59 85 \
> + 41 13 89 9 \
> + 99 96 85 29 \
> + 99 21 45 31 \
> + 8 39 96 95 \
> + 76 67 63 5
> +
> +arg_in: 3 buffer int4[10] \
> + 68 94 38 52 \
> + 99 72 76 65 \
> + 53 46 95 5 \
> + 3 53 50 96 \
> + 59 96 56 14 \
> + 16 7 36 25 \
> + 54 51 10 41 \
> + 51 89 44 47 \
> + 39 27 69 28 \
> + 60 22 32 70
> +
> +arg_in: 4 buffer int4[10] \
> + 83 3 5 53 \
> + 7 63 89 27 \
> + 76 77 83 12 \
> + 75 56 89 63 \
> + 99 41 14 57 \
> + 13 79 45 35 \
> + 58 88 44 73 \
> + 81 36 21 100 \
> + 78 79 42 28 \
> + 11 77 53 55
> +
> +arg_in: 5 buffer int4[10] \
> + 67 0 38 6 \
> + 44 77 48 24 \
> + 1 69 87 63 \
> + 85 16 21 72 \
> + 25 49 46 97 \
> + 45 81 94 65 \
> + 87 72 80 71 \
> + 15 63 62 0 \
> + 19 1 19 99 \
> + 59 33 71 14
> +
> +arg_in: 6 buffer int4[10] \
> + 42 69 59 93 \
> + 27 36 16 49 \
> + 74 73 100 89 \
> + 13 9 27 0 \
> + 12 20 32 63 \
> + 21 68 91 25 \
> + 74 1 89 53 \
> + 0 78 73 32 \
> + 24 82 13 40 \
> + 16 49 86 82
> +
> +arg_in: 7 buffer int4[10] \
> + 85 53 9 66 \
> + 90 91 6 91 \
> + 23 30 35 51 \
> + 41 2 71 94 \
> + 12 65 62 81 \
> + 86 47 51 60 \
> + 46 6 72 49 \
> + 42 68 68 20 \
> + 60 17 83 15 \
> + 56 74 31 13
> +
> +arg_in: 8 buffer int4[10] \
> + 42 70 91 76 \
> + 50 52 32 99 \
> + 59 85 41 84 \
> + 24 18 32 89 \
> + 25 48 39 20 \
> + 82 76 46 96 \
> + 67 68 20 92 \
> + 84 71 83 9 \
> + 99 25 23 10 \
> + 53 31 59 52
> +
> +arg_in: 9 buffer int[10] \
> + 26 \
> + 49 \
> + 27 \
> + 65 \
> + 2 \
> + 63 \
> + 87 \
> + 8 \
> + 22 \
> + 43
> +
> +!*/
> +
> +#define NOINLINE __attribute__((noinline))
> +
> +NOINLINE
> +int i32_func_i32_i32(int x, int y)
> +{
> + return x / y + 4;
> +}
> +
> +NOINLINE
> +int i32_func_i32_i32_i32(int x, int y, int z)
> +{
> + return x / y + z;
> +}
> +
> +// Test a basic tail call
> +NOINLINE
> +int tailcall_i32_func_i32_i32(int x, int y)
> +{
> + x += 5;
> + y += 10;
> + return i32_func_i32_i32(x, y);
> +}
> +
> +// Test a basic tail call with more arguments in the callee than
> +// caller.
> +NOINLINE
> +int tailcall_i32_func_i32_i32_extra_arg(int x, int y)
> +{
> + int z = x + y + 1;
> + x += 5;
> + y += 10;
> + return i32_func_i32_i32_i32(x, y, z);
> +}
> +
> +// Test a basic tail call with fewere arguments in the callee than
> +// caller.
> +NOINLINE
> +int tailcall_i32_func_i32_i32_i32_fewer_args(int x, int y, int z)
> +{
> + x += 5;
> + y += 10;
> + return i32_func_i32_i32(x, y + z);
> +}
> +
> +kernel void kernel_call_tailcall(global int* output,
> + global int* input0,
> + global int* input1)
> +{
> + int id = get_global_id(0);
> + output[id] = tailcall_i32_func_i32_i32(input0[id], input1[id]);
> +}
> +
> +kernel void kernel_call_tailcall_extra_arg(global int* output,
> + global int* input0,
> + global int* input1)
> +{
> + int id = get_global_id(0);
> + output[id] = tailcall_i32_func_i32_i32_extra_arg(input0[id], input1[id]);
> +}
> +
> +kernel void kernel_call_tailcall_fewer_args(global int* output,
> + global int* input0,
> + global int* input1,
> + global int* input2)
> +{
> + int id = get_global_id(0);
> + output[id] = tailcall_i32_func_i32_i32_i32_fewer_args(input0[id], input1[id], input2[id]);
> +}
> +
> +NOINLINE
> +int4 v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_i32(
> + int4 arg0, int4 arg1, int4 arg2, int4 arg3,
> + int4 arg4, int4 arg5, int4 arg6, int4 arg7,
> + int arg8)
> +{
> + // Try to make sure we can't clobber the incoming stack arguments
> + // with local stack objects.
> + volatile int4 args[8] = { arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 };
> + volatile int scalar_arg = arg8;
> +
> + int4 total = 0;
> + for (int i = 0; i < 8; ++i)
> + {
> + total += args[i];
> + }
> +
> + return total * scalar_arg;
> +}
> +
> +// Test a basic tail call
> +NOINLINE
> +int4 tailcall_v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_i32(
> + int4 arg0, int4 arg1, int4 arg2, int4 arg3,
> + int4 arg4, int4 arg5, int4 arg6, int4 arg7,
> + int arg8)
> +{
> + arg0 *= 2;
> + return v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_i32(
> + arg0, arg1, arg2, arg3, arg4,
> + arg5, arg6, arg7, arg8);
> +}
> +
> +kernel void kernel_call_tailcall_stack_passed_args(global int4* output,
> + global int4* input0,
> + global int4* input1,
> + global int4* input2,
> + global int4* input3,
> + global int4* input4,
> + global int4* input5,
> + global int4* input6,
> + global int4* input7,
> + global int* input8)
> +{
> + int id = get_global_id(0);
> + output[id] = tailcall_v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_i32(
> + input0[id],
> + input1[id],
> + input2[id],
> + input3[id],
> + input4[id],
> + input5[id],
> + input6[id],
> + input7[id],
> + input8[id]);
> +}
--
Jan Vesely <jan.vesely at rutgers.edu>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part
URL: <https://lists.freedesktop.org/archives/piglit/attachments/20170919/5f79b516/attachment-0001.sig>
More information about the Piglit
mailing list