[Piglit] [PATCH] cl: Add tests for function calls

Jan Vesely jan.vesely at rutgers.edu
Tue Sep 19 05:17:31 UTC 2017


On Mon, 2017-09-18 at 18:53 -0700, Matt Arsenault wrote:
> Passes on ROCm, I haven't tried clover recently. Last
> time I did it errored because the AsmParser wasn't properly
> initialized.
> 
> v2: Fix non-unique test names, Wrap noinline in unguarded macro,
> use prettier test names, use device_regex (effectively restricting to ROCm)

thanks.
Reviewed-by: Jan Vesely <jan.vesely at rutgers.edu>
I want to run this on my carrizo machine before pushing, should be
~wednesday.

Jan

> ---
>  tests/cl/program/execute/call-clobbers-amdgcn.cl |  68 +++
>  tests/cl/program/execute/calls-struct.cl         | 179 +++++++
>  tests/cl/program/execute/calls-workitem-id.cl    |  77 +++
>  tests/cl/program/execute/calls.cl                | 607 +++++++++++++++++++++++
>  tests/cl/program/execute/tail-calls.cl           | 305 ++++++++++++
>  5 files changed, 1236 insertions(+)
>  create mode 100644 tests/cl/program/execute/call-clobbers-amdgcn.cl
>  create mode 100644 tests/cl/program/execute/calls-struct.cl
>  create mode 100644 tests/cl/program/execute/calls-workitem-id.cl
>  create mode 100644 tests/cl/program/execute/calls.cl
>  create mode 100644 tests/cl/program/execute/tail-calls.cl
> 
> diff --git a/tests/cl/program/execute/call-clobbers-amdgcn.cl b/tests/cl/program/execute/call-clobbers-amdgcn.cl
> new file mode 100644
> index 000000000..400771795
> --- /dev/null
> +++ b/tests/cl/program/execute/call-clobbers-amdgcn.cl
> @@ -0,0 +1,68 @@
> +/*!
> +
> +[config]
> +name: amdgcn call clobbers
> +clc_version_min: 10
> +device_regex: gfx[\d]*
> +
> +[test]
> +name: callee saved sgpr
> +kernel_name: call_clobber_s40
> +dimensions: 1
> +global_size: 1 0 0
> +arg_out: 0 buffer int[1] 0xabcd1234
> +
> +[test]
> +name: callee saved vgpr
> +kernel_name: call_clobber_v40
> +dimensions: 1
> +global_size: 1 0 0
> +arg_out: 0 buffer int[1] 0xabcd1234
> +
> +!*/
> +
> +#ifndef __AMDGCN__
> +#error This test is only for amdgcn
> +#endif
> +
> +__attribute__((noinline))
> +void clobber_s40()
> +{
> +    __asm volatile("s_mov_b32 s40, 0xdead" : : : "s40");
> +}
> +
> +kernel void call_clobber_s40(__global int* ret)
> +{
> +    __asm volatile("s_mov_b32 s40, 0xabcd1234" : : : "s40");
> +
> +    clobber_s40();
> +
> +    int tmp;
> +
> +    __asm volatile("v_mov_b32 %0, s40"
> +                  : "=v"(tmp)
> +                  :
> +                  : "s40");
> +    *ret = tmp;
> +}
> +
> +__attribute__((noinline))
> +void clobber_v40()
> +{
> +    __asm volatile("v_mov_b32 v40, 0xdead" : : : "v40");
> +}
> +
> +kernel void call_clobber_v40(__global int* ret)
> +{
> +    __asm volatile("v_mov_b32 v40, 0xabcd1234" : : : "v40");
> +
> +    clobber_v40();
> +
> +    int tmp;
> +    __asm volatile("v_mov_b32 %0, v40"
> +                  : "=v"(tmp)
> +                  :
> +                  : "v40");
> +    *ret = tmp;
> +}
> +
> diff --git a/tests/cl/program/execute/calls-struct.cl b/tests/cl/program/execute/calls-struct.cl
> new file mode 100644
> index 000000000..04f769dac
> --- /dev/null
> +++ b/tests/cl/program/execute/calls-struct.cl
> @@ -0,0 +1,179 @@
> +/*!
> +
> +[config]
> +name: calls with structs
> +clc_version_min: 10
> +
> +[test]
> +name: byval struct
> +kernel_name: call_i32_func_byval_Char_IntArray
> +dimensions: 1
> +global_size: 16 0 0
> +
> +arg_out: 0 buffer int[16]        \
> + 1021 1022 1023 1024 1025 1026 1027 1028 \
> + 1029 1030 1031 1032 1033 1034 1035 1036
> +
> +arg_out: 1 buffer int[16] \
> +  14   14   14   14 \
> +  14   14   14   14 \
> +  14   14   14   14 \
> +  14   14   14   14 \
> +
> +arg_in: 2 buffer int[16] \
> + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
> +
> +
> +[test]
> +name: sret struct
> +kernel_name: call_sret_Char_IntArray_func
> +dimensions: 1
> +global_size: 16 0 0
> +
> +arg_out: 0 buffer int[16]        \
> + 921 922 923 924 925 926 927 928 \
> + 929 930 931 932 933 934 935 936
> +
> +arg_in: 1 buffer int[16] \
> + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
> +
> +
> +[test]
> +name: byval struct and sret struct
> +kernel_name: call_sret_Char_IntArray_func_byval_Char_IntArray
> +dimensions: 1
> +global_size: 16 0 0
> +
> +arg_out: 0 buffer int[16]        \
> +  86 87 88 89   \
> +  90 91 92 93   \
> +  94 95 96 97   \
> +  98 99 100 101
> +
> +arg_out: 1 buffer int[16]        \
> +  134  135  136  137  \
> +  138  139  140  141  \
> +  142  143  144  145  \
> +  146  147  148  149
> +
> +arg_in: 2 buffer int[16] \
> + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
> +
> +!*/
> +
> +#define NOINLINE __attribute__((noinline))
> +
> +typedef struct ByVal_Char_IntArray {
> +    char c;
> +    int i[4];
> +} ByVal_Char_IntArray;
> +
> +NOINLINE
> +int i32_func_byval_Char_IntArray(ByVal_Char_IntArray st)
> +{
> +    st.i[0] += 100;
> +
> +    int sum = 0;
> +    for (int i = 0; i < 4; ++i)
> +    {
> +        sum += st.i[i];
> +    }
> +
> +    sum += st.c;
> +    return sum;
> +}
> +
> +kernel void call_i32_func_byval_Char_IntArray(global int* out0,
> +                                              global int* out1,
> +                                              global int* input)
> +{
> +    ByVal_Char_IntArray st;
> +    st.c = 15;
> +
> +    int id = get_global_id(0);
> +
> +    int val = input[id];
> +    st.i[0] = 14;
> +    st.i[1] = -8;
> +    st.i[2] = val;
> +    st.i[3] = 900;
> +
> +    int result = i32_func_byval_Char_IntArray(st);
> +    out0[id] = result;
> +    out1[id] = st.i[0];
> +}
> +
> +NOINLINE
> +ByVal_Char_IntArray sret_Char_IntArray_func(global int* input, int id)
> +{
> +    ByVal_Char_IntArray st;
> +    st.c = 15;
> +
> +    int val = input[id];
> +    st.i[0] = 14;
> +    st.i[1] = -8;
> +    st.i[2] = val;
> +    st.i[3] = 900;
> +
> +    return st;
> +}
> +
> +kernel void call_sret_Char_IntArray_func(global int* output, global int* input)
> +{
> +    int id = get_global_id(0);
> +    ByVal_Char_IntArray st = sret_Char_IntArray_func(input, id);
> +
> +    int sum = 0;
> +    for (int i = 0; i < 4; ++i)
> +    {
> +        sum += st.i[i];
> +    }
> +
> +    sum += st.c;
> +    output[id] = sum;
> +}
> +
> +NOINLINE
> +ByVal_Char_IntArray sret_Char_IntArray_func_byval_Char_IntArray(ByVal_Char_IntArray st)
> +{
> +    st.c += 15;
> +
> +    st.i[0] += 14;
> +    st.i[1] -= 8;
> +    st.i[2] += 9;
> +    st.i[3] += 18;
> +
> +    return st;
> +}
> +
> +kernel void call_sret_Char_IntArray_func_byval_Char_IntArray(global int* output0,
> +                                                             global int* output1,
> +                                                             global int* input)
> +{
> +    int id = get_global_id(0);
> +
> +    volatile ByVal_Char_IntArray st0;
> +    st0.c = -20;
> +
> +    int val = input[id];
> +    st0.i[0] = 14;
> +    st0.i[1] = -8;
> +    st0.i[2] = val;
> +    st0.i[3] = 100;
> +
> +    ByVal_Char_IntArray st1 = sret_Char_IntArray_func_byval_Char_IntArray(st0);
> +
> +    int sum0 = 0;
> +    int sum1 = 0;
> +    for (int i = 0; i < 4; ++i)
> +    {
> +        sum0 += st0.i[i];
> +        sum1 += st1.i[i];
> +    }
> +
> +    sum0 += st0.c;
> +    sum1 += st1.c;
> +
> +    output0[id] = sum0;
> +    output1[id] = sum1;
> +}
> diff --git a/tests/cl/program/execute/calls-workitem-id.cl b/tests/cl/program/execute/calls-workitem-id.cl
> new file mode 100644
> index 000000000..7edfad7e9
> --- /dev/null
> +++ b/tests/cl/program/execute/calls-workitem-id.cl
> @@ -0,0 +1,77 @@
> +/*!
> +
> +[config]
> +name: calls workitem IDs
> +clc_version_min: 10
> +
> +[test]
> +name: Callee function use get_global_id(0)
> +kernel_name: kernel_call_pass_get_global_id_0
> +dimensions: 1
> +global_size: 64 0 0
> +arg_out: 0 buffer uint[64] \
> +  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 \
> + 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 \
> + 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 \
> + 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
> +
> +[test]
> +name: Callee function use get_global_id 0..2
> +kernel_name: kernel_call_pass_get_global_id_012
> +dimensions: 3
> +global_size: 8 4 2
> +arg_out: 0 buffer uint[64] \
> +  0  1  2  3  4  5  6  7  0  1  2  3  4  5  6  7 \
> +  0  1  2  3  4  5  6  7  0  1  2  3  4  5  6  7 \
> +  0  1  2  3  4  5  6  7  0  1  2  3  4  5  6  7 \
> +  0  1  2  3  4  5  6  7  0  1  2  3  4  5  6  7
> +
> +arg_out: 1 buffer uint[64] \
> +  0  0  0  0  0  0  0  0  1  1  1  1  1  1  1  1 \
> +  2  2  2  2  2  2  2  2  3  3  3  3  3  3  3  3 \
> +  0  0  0  0  0  0  0  0  1  1  1  1  1  1  1  1 \
> +  2  2  2  2  2  2  2  2  3  3  3  3  3  3  3  3
> +
> +arg_out: 2 buffer uint[64] \
> +  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 \
> +  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 \
> +  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 \
> +  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
> +
> +!*/
> +
> +#define NOINLINE __attribute__((noinline))
> +
> +NOINLINE
> +void func_get_global_id_0(volatile global uint* out)
> +{
> +    uint gid = get_global_id(0);
> +    out[gid] = gid;
> +}
> +
> +kernel void kernel_call_pass_get_global_id_0(global uint *out)
> +{
> +    func_get_global_id_0(out);
> +}
> +
> +NOINLINE
> +void func_get_global_id_012(volatile global uint* out0,
> +                            volatile global uint* out1,
> +                            volatile global uint* out2)
> +{
> +    uint id0 = get_global_id(0);
> +    uint id1 = get_global_id(1);
> +    uint id2 = get_global_id(2);
> +    uint flat_id = (id2 * get_global_size(1) + id1) * get_global_size(0) + id0;
> +
> +    out0[flat_id] = id0;
> +    out1[flat_id] = id1;
> +    out2[flat_id] = id2;
> +}
> +
> +kernel void kernel_call_pass_get_global_id_012(global uint *out0,
> +                                               global uint *out1,
> +                                               global uint *out2)
> +{
> +    func_get_global_id_012(out0, out1, out2);
> +}
> diff --git a/tests/cl/program/execute/calls.cl b/tests/cl/program/execute/calls.cl
> new file mode 100644
> index 000000000..f4f55be31
> --- /dev/null
> +++ b/tests/cl/program/execute/calls.cl
> @@ -0,0 +1,607 @@
> +/*!
> +
> +[config]
> +name: calls
> +clc_version_min: 10
> +
> +[test]
> +name: Call void_func_void
> +kernel_name: call_void_func_void
> +dimensions: 1
> +global_size: 1 0 0
> +arg_out: 0 buffer int[1] 12345
> +
> +[test]
> +name: Call i32_func_void
> +kernel_name: call_i32_func_void
> +dimensions: 1
> +global_size: 1 0 0
> +arg_out: 0 buffer int[1] 0x12345
> +
> +[test]
> +name: Call i64_func_void
> +kernel_name: call_i64_func_void
> +dimensions: 1
> +global_size: 1 0 0
> +arg_out: 0 buffer long[1] 0x100000000000
> +
> +
> +[test]
> +name: Call call_i32_func_void_callee_stack
> +kernel_name: call_i32_func_void_callee_stack
> +dimensions: 1
> +global_size: 1 0 0
> +arg_out: 0 buffer int[1] 290
> +
> +[test]
> +name: Call call_i32_func_p0i32_i32_caller_stack
> +kernel_name: call_i32_func_p0i32_i32_caller_stack
> +dimensions: 1
> +global_size: 1 0 0
> +arg_out: 0 buffer int[1] 175
> +
> +[test]
> +name: Call call_i32_func_p0i32_i32_indirect_kernel_stack
> +kernel_name: call_i32_func_p0i32_i32_indirect_kernel_stack
> +dimensions: 1
> +global_size: 1 0 0
> +arg_out: 0 buffer int[1] 241
> +
> +[test]
> +name: Call call_i32_func_p0i32_i32_indirect_function_stack
> +kernel_name: call_i32_func_p0i32_i32_indirect_function_stack
> +dimensions: 1
> +global_size: 1 0 0
> +arg_out: 0 buffer int[1] 291
> +
> +[test]
> +name: callee stack corruption
> +kernel_name: kernel_call_nested_stack_usage
> +dimensions: 1
> +global_size: 10 0 0
> +
> +arg_out: 0 buffer int4[10] \
> +    53    48   156   160  \
> +    84   248   102   150  \
> +   102    56   217   106  \
> +   100   123   151   139  \
> +    80   150   135   163  \
> +   223    99   117   199  \
> +   187   262   223   169  \
> +   277   129    73   121  \
> +   162   165   138   137  \
> +   204   207   223   145  \
> +
> +
> +arg_in: 1 buffer int4[10] \
> +     0    13    76    46  \
> +     4    74    33    63  \
> +    26     9    95     7  \
> +    41    54    47    29  \
> +    15    68    38    39  \
> +    91    43    14    95  \
> +    44    83    69    70  \
> +    89    54    14    45  \
> +    77    63    21    21  \
> +    64    70    80    70
> +
> +arg_in: 2 buffer int4[10] \
> +    53    22     4    68  \
> +    76   100    36    24  \
> +    50    38    27    92  \
> +    18    15    57    81  \
> +    50    14    59    85  \
> +    41    13    89     9  \
> +    99    96    85    29  \
> +    99    21    45    31  \
> +     8    39    96    95  \
> +    76    67    63     5
> +
> +[test]
> +name: nested calls
> +kernel_name: kernel_nested_calls
> +dimensions: 1
> +global_size: 4 0 0
> +
> +arg_out: 0 buffer int[4] \
> +  1    7   155     -4
> +
> +arg_in: 1 buffer int[4] \
> +  0   100  1234  -912
> +
> +arg_in: 2 buffer int[4] \
> +  1    4      2    45
> +
> +
> +[test]
> +name: Kernel call stack argument
> +kernel_name: kernel_call_stack_arg
> +dimensions: 1
> +global_size: 10 0 0
> +
> +
> +arg_out: 0 buffer int4[10] \
> + 11440  1348 29304 16698  \
> + 47975  3626 30850 13224  \
> +  8235 30495 31995  1455  \
> + 16048 40512 33992  7028  \
> +  9450  5356 21330 23130  \
> + 21120 35186 52896 49968  \
> + 34083 28520     0     0  \
> + 12384 41492  4420 17880  \
> + 37310 19320 37518 13175  \
> + 23852 16014 22734 24284  \
> +
> +
> +arg_in: 1 buffer int4[10] \
> +     0    13    76    46  \
> +    63    76   100    36  \
> +    27    92    53    46  \
> +    53    50    96    75  \
> +    99    41    14    57  \
> +    35    45    81    94  \
> +    80    71    74     1  \
> +    78    73    32    42  \
> +    60    17    83    15  \
> +    13    53    31    59
> +
> +arg_in: 2 buffer int4[10] \
> +    53    22     4    68  \
> +    24    99    72    76  \
> +    95     5    76    77  \
> +    56    89    63    85  \
> +    25    49    46    97  \
> +    65    21    68    91  \
> +    89    53    46     6  \
> +    68    68    20    84  \
> +    99    25    23    10  \
> +    52    43    26    37
> +
> +arg_in: 3 buffer int4[10] \
> +    68    94    38    52  \
> +    65     7    63    89  \
> +    83    12     1    69  \
> +    16    21    72    13  \
> +    12    20    32    63  \
> +    25    86    47    51  \
> +    72    49    67    68  \
> +    71    83     9     8  \
> +    22    64    70    80  \
> +    39    45    48    39
> +
> +arg_in: 4 buffer int4[10] \
> +    83     3     5    53  \
> +    27    44    77    48  \
> +    87    63    74    73  \
> +     9    27     0    41  \
> +    12    65    62    81  \
> +    60    82    76    46  \
> +    20    92    87    89  \
> +    77    63    21    21  \
> +    70    76    67    63  \
> +    28     7    37    25
> +
> +arg_in: 5 buffer int4[10] \
> +    67     0    38     6  \
> +    24    27    36    16  \
> +   100    89    23    30  \
> +     2    71    94    24  \
> +    25    48    39    20  \
> +    96    63    44    83  \
> +    54    14    45    99  \
> +     8    39    96    95  \
> +     5    60    22    32  \
> +    67    68    51    73
> +
> +arg_in: 6 buffer int4[10] \
> +    42    69    59    93  \
> +    49    90    91     6  \
> +    35    51    59    85  \
> +    18    32    89    65  \
> +     2    91    43    14  \
> +    69    70    99    96  \
> +    21    45    31    51  \
> +    39    27    69    28  \
> +    70    11    77    53  \
> +    72    95    46    94
> +
> +arg_in: 7 buffer int4[10] \
> +    85    53     9    66  \
> +    91    50    52    32  \
> +    41    84    27    41  \
> +    15    68    38    39  \
> +    95    41    13    89  \
> +    85    29    54    51  \
> +    89    44    47    81  \
> +    78    79    42    28  \
> +    55    59    33    71  \
> +    32    46    52    66
> +
> +arg_in: 8 buffer int4[10] \
> +    42    70    91    76  \
> +    99    49    26     9  \
> +    54    47    29    18  \
> +    50    14    59    85  \
> +     9    16     7    36  \
> +    10    41    58    88  \
> +    36    21   100    15  \
> +    19     1    19    99  \
> +    14    16    49    86  \
> +    40    61    99    15
> +
> +arg_in: 9 buffer int4[10] \
> +    26     4    74    33  \
> +    95     7    50    38  \
> +    15    57    81     3  \
> +    59    96    56    14  \
> +    25    13    79    45  \
> +    44    73    87    72  \
> +    63    62     0     0  \
> +    24    82    13    40  \
> +    82    56    74    31  \
> +    67    34    54    52
> +
> +!*/
> +
> +// The inline asm is necessary to defeat interprocedural sparse
> +// conditional constant propagation eliminating some of the trivial
> +// calls.
> +#ifdef __AMDGCN__
> +#define USE_ASM 1
> +#endif
> +
> +#define NOINLINE __attribute__((noinline))
> +
> +NOINLINE
> +void void_func_void(void)
> +{
> +#if USE_ASM
> +  __asm("");
> +#endif
> +}
> +
> +kernel void call_void_func_void(__global int* ret)
> +{
> +  void_func_void();
> +  *ret = 12345;
> +}
> +
> +NOINLINE
> +int i32_func_void(void)
> +{
> +    int ret;
> +#if USE_ASM
> +    __asm("v_mov_b32 %0, 0x12345" : "=v"(ret));
> +#else
> +    ret = 0x12345;
> +#endif
> +
> +    return ret;
> +}
> +
> +kernel void call_i32_func_void(__global int* ret)
> +{
> +    *ret = i32_func_void();
> +}
> +
> +NOINLINE
> +long i64_func_void(void)
> +{
> +    long ret;
> +#if USE_ASM
> +    __asm("v_lshlrev_b64 %0, 44, 1" : "=v"(ret));
> +#else
> +    ret = 1ull << 44;
> +#endif
> +    return ret;
> +}
> +
> +kernel void call_i64_func_void(__global long* ret)
> +{
> +    *ret = i64_func_void();
> +}
> +
> +
> +NOINLINE
> +int i32_func_void_callee_stack(void)
> +{
> +    int ret;
> +#if USE_ASM
> +    __asm("v_mov_b32 %0, 0x64" : "=v"(ret));
> +#else
> +    ret = 0x64;
> +#endif
> +
> +    volatile int alloca[20];
> +
> +    for (int i = 0; i < 20; ++i)
> +    {
> +        alloca[i] = i;
> +    }
> +
> +    for (int i = 0; i < 20; ++i)
> +    {
> +        ret += alloca[i];
> +    }
> +
> +    return ret;
> +}
> +
> +kernel void call_i32_func_void_callee_stack(__global int* ret)
> +{
> +    volatile int alloca[10];
> +
> +    for (int i = 0; i < 10; ++i)
> +    {
> +        alloca[i] = 0xffff;
> +    }
> +
> +
> +    *ret = i32_func_void_callee_stack();
> +}
> +
> +NOINLINE
> +int i32_func_p0i32_i32_caller_stack(volatile int* stack, int n)
> +{
> +    int ret;
> +#if USE_ASM
> +    __asm("v_mov_b32 %0, 0x64" : "=v"(ret));
> +#else
> +    ret = 0x64;
> +#endif
> +
> +    for (int i = 0; i < n; ++i)
> +    {
> +        ret += stack[i];
> +    }
> +
> +    return ret;
> +}
> +
> +kernel void call_i32_func_p0i32_i32_caller_stack(__global int* ret)
> +{
> +    volatile int alloca[10];
> +
> +    for (int i = 0; i < 10; ++i)
> +    {
> +        alloca[i] = 3 + i;
> +    }
> +
> +    *ret = i32_func_p0i32_i32_caller_stack(alloca, 10);
> +}
> +
> +NOINLINE
> +int i32_func_p0i32_i32_indirect_stack(volatile int* stack, int n)
> +{
> +    int ret;
> +#if USE_ASM
> +    __asm("v_mov_b32 %0, 0x64" : "=v"(ret));
> +#else
> +    ret = 0x64;
> +#endif
> +    for (int i = 0; i < n; ++i)
> +    {
> +        ret += stack[i];
> +    }
> +
> +    return ret;
> +}
> +
> +// Access a stack object in the parent kernel's frame.
> +NOINLINE
> +int i32_func_p0i32_i32_pass_kernel_stack(volatile int* stack, int n)
> +{
> +    int ret;
> +#if USE_ASM
> +    __asm("v_mov_b32 %0, 0x42" : "=v"(ret));
> +#else
> +    ret = 0x42;
> +#endif
> +
> +    volatile int local_object[10];
> +    for (int i = 0; i < 10; ++i)
> +        local_object[i] = -1;
> +
> +    ret += i32_func_p0i32_i32_indirect_stack(stack, n);
> +
> +    return ret;
> +}
> +
> +kernel void call_i32_func_p0i32_i32_indirect_kernel_stack(volatile __global int* ret)
> +{
> +    volatile int alloca[10];
> +
> +    for (int i = 0; i < 10; ++i)
> +    {
> +        alloca[i] = 3 + i;
> +    }
> +
> +    *ret = i32_func_p0i32_i32_pass_kernel_stack(alloca, 10);
> +}
> +
> +// Access a stack object in a parent non-kernel function's stack frame.
> +NOINLINE
> +int i32_func_void_pass_function_stack()
> +{
> +    int ret;
> +#if USE_ASM
> +    __asm("v_mov_b32 %0, 0x42" : "=v"(ret));
> +#else
> +    ret = 0x42;
> +#endif
> +
> +    volatile int local_object[10];
> +    for (int i = 0; i < 10; ++i)
> +        local_object[i] = 8 + i;
> +
> +    ret += i32_func_p0i32_i32_indirect_stack(local_object, 10);
> +    return ret;
> +}
> +
> +kernel void call_i32_func_p0i32_i32_indirect_function_stack(__global int* ret)
> +{
> +  *ret = i32_func_void_pass_function_stack();
> +}
> +
> +NOINLINE
> +int4 v4i32_func_v4i32_v4i32_stack(int4 arg0, int4 arg1)
> +{
> +    // Force stack usage.
> +    volatile int4 args[8] = { arg0, arg1 };
> +
> +    int4 total = 0;
> +    for (int i = 0; i < 8; ++i)
> +    {
> +        total += args[i];
> +    }
> +
> +    return total;
> +}
> +
> +// Make sure using stack in a callee function from a callee function
> +// doesn't corrupt caller's stack objects.
> +NOINLINE
> +int4 nested_stack_usage_v4i32_func_v4i32_v4i32(int4 arg0, int4 arg1)
> +{
> +    volatile int stack_object[4];
> +    for (int i = 0; i < 4; ++i) {
> +        const int test_val = 0x04030200 | i;
> +        stack_object[i] = test_val;
> +    }
> +
> +    arg0 *= 2;
> +
> +    int4 result = v4i32_func_v4i32_v4i32_stack(arg0, arg1);
> +
> +    // Check for stack corruption
> +    for (int i = 0; i < 4; ++i)
> +    {
> +        const int test_val = 0x04030200 | i;
> +        if (stack_object[i] != test_val)
> +            result = -1;
> +    }
> +
> +    return result;
> +}
> +
> +kernel void kernel_call_nested_stack_usage(global int4* output,
> +                                           global int4* input0,
> +                                           global int4* input1)
> +{
> +    int id = get_global_id(0);
> +    output[id] = nested_stack_usage_v4i32_func_v4i32_v4i32(
> +        input0[id],
> +        input1[id]);
> +}
> +
> +NOINLINE
> +int func_div_add(int x, int y)
> +{
> +    return x / y + 4;
> +}
> +
> +NOINLINE
> +int call_i32_func_i32_i32(int x, int y, volatile int* ptr)
> +{
> +    int tmp = func_div_add(x, y) >> 2;
> +    return tmp + *ptr;
> +}
> +
> +kernel void kernel_nested_calls(global int* output,
> +                                global int* input0,
> +                                global int* input1)
> +{
> +    int id = get_global_id(0);
> +    volatile int zero = 0;
> +    output[id] = call_i32_func_i32_i32(input0[id], input1[id], &zero);
> +}
> +
> +NOINLINE
> +int4 v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32(
> +    int4 arg0, int4 arg1, int4 arg2, int4 arg3,
> +    int4 arg4, int4 arg5, int4 arg6, int4 arg7,
> +    int4 arg8)
> +{
> +    // Try to make sure we can't clobber the incoming stack arguments
> +    // with local stack objects.
> +    volatile int4 args[8] = { arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 };
> +    volatile int4 last_arg = arg8;
> +
> +    int4 total = 0;
> +    for (int i = 0; i < 8; ++i)
> +    {
> +        total += args[i];
> +    }
> +
> +    return total * last_arg;
> +}
> +
> + // Test argument passed on stack, but doesn't use byval.
> +NOINLINE
> +int4 stack_arg_v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32(
> +    int4 arg0, int4 arg1, int4 arg2, int4 arg3,
> +    int4 arg4, int4 arg5, int4 arg6, int4 arg7,
> +    int4 arg8)
> +{
> +    volatile int stack_object[8];
> +    for (int i = 0; i < 8; ++i) {
> +        const int test_val = 0x04030200 | i;
> +        stack_object[i] = test_val;
> +    }
> +
> +    arg0 *= 2;
> +
> +    int4 result = v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32(
> +        arg0, arg1, arg2, arg3, arg4,
> +        arg5, arg6, arg7, arg8);
> +
> +    // Check for stack corruption.
> +    for (int i = 0; i < 8; ++i)
> +    {
> +        const int test_val = 0x04030200 | i;
> +        if (stack_object[i] != test_val)
> +            result = -1;
> +    }
> +
> +    return result;
> +}
> +
> +kernel void kernel_call_stack_arg(global int4* output,
> +                                  global int4* input0,
> +                                  global int4* input1,
> +                                  global int4* input2,
> +                                  global int4* input3,
> +                                  global int4* input4,
> +                                  global int4* input5,
> +                                  global int4* input6,
> +                                  global int4* input7,
> +                                  global int4* input8)
> +{
> +    int id = get_global_id(0);
> +
> +    volatile int stack_object[8];
> +    for (int i = 0; i < 8; ++i) {
> +        const int test_val = 0x05060700 | i;
> +        stack_object[i] = test_val;
> +    }
> +
> +    output[id] = stack_arg_v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32(
> +        input0[id],
> +        input1[id],
> +        input2[id],
> +        input3[id],
> +        input4[id],
> +        input5[id],
> +        input6[id],
> +        input7[id],
> +        input8[id]);
> +
> +    // Check for stack corruption.
> +    for (int i = 0; i < 8; ++i)
> +    {
> +        const int test_val = 0x05060700 | i;
> +        if (stack_object[i] != test_val)
> +            output[id] = -1;
> +    }
> +
> +}
> diff --git a/tests/cl/program/execute/tail-calls.cl b/tests/cl/program/execute/tail-calls.cl
> new file mode 100644
> index 000000000..cf33373cc
> --- /dev/null
> +++ b/tests/cl/program/execute/tail-calls.cl
> @@ -0,0 +1,305 @@
> +/*!
> +
> +[config]
> +name: tail calls
> +clc_version_min: 10
> +dimensions: 1
> +
> +[test]
> +name: Basic tail call
> +kernel_name: kernel_call_tailcall
> +global_size: 4 0 0
> +
> +arg_out: 0 buffer int[4] \
> +  4    11   107     -12
> +
> +arg_in: 1 buffer int[4] \
> +  0   100  1234  -912
> +
> +arg_in: 2 buffer int[4] \
> +  1    4      2    45
> +
> +[test]
> +name: Tail call with more arguments than caller
> +kernel_name: kernel_call_tailcall_extra_arg
> +global_size: 4 0 0
> +
> +arg_out: 0 buffer int[4] \
> +  2    112   1340   -882
> +
> +arg_in: 1 buffer int[4] \
> +  0   100  1234  -912
> +
> +arg_in: 2 buffer int[4] \
> +  1    4      2    45
> +
> +[test]
> +name: Tail call with fewer arguments than acller
> +kernel_name: kernel_call_tailcall_fewer_args
> +global_size: 4 0 0
> +
> +arg_out: 0 buffer int[4] \
> +  4    8   81   -10
> +
> +arg_in: 1 buffer int[4] \
> +  0   100  1234  -912
> +
> +arg_in: 2 buffer int[4] \
> +  1    4      2    45
> +
> +arg_in: 3 buffer int[4] \
> +  3    8      4    9
> +
> +[test]
> +name: Tail call with stack passed argument
> +kernel_name: kernel_call_tailcall_stack_passed_args
> +global_size: 10 0 0
> +
> +arg_out: 0 buffer int4[10] \
> + 11440  8762 10296 13156  \
> + 19649 31311 18081 24745  \
> + 10476 11772 17766 11070  \
> + 22165 18005 28665 35945  \
> +   624   938   768   990  \
> + 30618 28791 30240 31815  \
> + 49851 47676 46806 47676  \
> +  4400  4272  3392  2632  \
> + 10582  8712  8514  7854  \
> + 19737 21199 23865 18533  \
> +
> +
> +arg_in: 1 buffer int4[10] \
> +     0    13    76    46  \
> +     4    74    33    63  \
> +    26     9    95     7  \
> +    41    54    47    29  \
> +    15    68    38    39  \
> +    91    43    14    95  \
> +    44    83    69    70  \
> +    89    54    14    45  \
> +    77    63    21    21  \
> +    64    70    80    70
> +
> +arg_in: 2 buffer int4[10] \
> +    53    22     4    68  \
> +    76   100    36    24  \
> +    50    38    27    92  \
> +    18    15    57    81  \
> +    50    14    59    85  \
> +    41    13    89     9  \
> +    99    96    85    29  \
> +    99    21    45    31  \
> +     8    39    96    95  \
> +    76    67    63     5
> +
> +arg_in: 3 buffer int4[10] \
> +    68    94    38    52  \
> +    99    72    76    65  \
> +    53    46    95     5  \
> +     3    53    50    96  \
> +    59    96    56    14  \
> +    16     7    36    25  \
> +    54    51    10    41  \
> +    51    89    44    47  \
> +    39    27    69    28  \
> +    60    22    32    70
> +
> +arg_in: 4 buffer int4[10] \
> +    83     3     5    53  \
> +     7    63    89    27  \
> +    76    77    83    12  \
> +    75    56    89    63  \
> +    99    41    14    57  \
> +    13    79    45    35  \
> +    58    88    44    73  \
> +    81    36    21   100  \
> +    78    79    42    28  \
> +    11    77    53    55
> +
> +arg_in: 5 buffer int4[10] \
> +    67     0    38     6  \
> +    44    77    48    24  \
> +     1    69    87    63  \
> +    85    16    21    72  \
> +    25    49    46    97  \
> +    45    81    94    65  \
> +    87    72    80    71  \
> +    15    63    62     0  \
> +    19     1    19    99  \
> +    59    33    71    14
> +
> +arg_in: 6 buffer int4[10] \
> +    42    69    59    93  \
> +    27    36    16    49  \
> +    74    73   100    89  \
> +    13     9    27     0  \
> +    12    20    32    63  \
> +    21    68    91    25  \
> +    74     1    89    53  \
> +     0    78    73    32  \
> +    24    82    13    40  \
> +    16    49    86    82
> +
> +arg_in: 7 buffer int4[10] \
> +    85    53     9    66  \
> +    90    91     6    91  \
> +    23    30    35    51  \
> +    41     2    71    94  \
> +    12    65    62    81  \
> +    86    47    51    60  \
> +    46     6    72    49  \
> +    42    68    68    20  \
> +    60    17    83    15  \
> +    56    74    31    13
> +
> +arg_in: 8 buffer int4[10] \
> +    42    70    91    76  \
> +    50    52    32    99  \
> +    59    85    41    84  \
> +    24    18    32    89  \
> +    25    48    39    20  \
> +    82    76    46    96  \
> +    67    68    20    92  \
> +    84    71    83     9  \
> +    99    25    23    10  \
> +    53    31    59    52
> +
> +arg_in: 9 buffer int[10] \
> +   26  \
> +   49  \
> +   27  \
> +   65  \
> +    2  \
> +   63  \
> +   87  \
> +    8  \
> +   22  \
> +   43
> +
> +!*/
> +
> +#define NOINLINE __attribute__((noinline))
> +
> +NOINLINE
> +int i32_func_i32_i32(int x, int y)
> +{
> +    return x / y + 4;
> +}
> +
> +NOINLINE
> +int i32_func_i32_i32_i32(int x, int y, int z)
> +{
> +    return x / y + z;
> +}
> +
> +// Test a basic tail call
> +NOINLINE
> +int tailcall_i32_func_i32_i32(int x, int y)
> +{
> +    x += 5;
> +    y += 10;
> +    return i32_func_i32_i32(x, y);
> +}
> +
> +// Test a basic tail call with more arguments in the callee than
> +// caller.
> +NOINLINE
> +int tailcall_i32_func_i32_i32_extra_arg(int x, int y)
> +{
> +    int z = x + y + 1;
> +    x += 5;
> +    y += 10;
> +    return i32_func_i32_i32_i32(x, y, z);
> +}
> +
> +// Test a basic tail call with fewere arguments in the callee than
> +// caller.
> +NOINLINE
> +int tailcall_i32_func_i32_i32_i32_fewer_args(int x, int y, int z)
> +{
> +    x += 5;
> +    y += 10;
> +    return i32_func_i32_i32(x, y + z);
> +}
> +
> +kernel void kernel_call_tailcall(global int* output,
> +                                 global int* input0,
> +                                 global int* input1)
> +{
> +    int id = get_global_id(0);
> +    output[id] = tailcall_i32_func_i32_i32(input0[id], input1[id]);
> +}
> +
> +kernel void kernel_call_tailcall_extra_arg(global int* output,
> +                                           global int* input0,
> +                                           global int* input1)
> +{
> +    int id = get_global_id(0);
> +    output[id] = tailcall_i32_func_i32_i32_extra_arg(input0[id], input1[id]);
> +}
> +
> +kernel void kernel_call_tailcall_fewer_args(global int* output,
> +                                            global int* input0,
> +                                            global int* input1,
> +                                            global int* input2)
> +{
> +    int id = get_global_id(0);
> +    output[id] = tailcall_i32_func_i32_i32_i32_fewer_args(input0[id], input1[id], input2[id]);
> +}
> +
> +NOINLINE
> +int4 v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_i32(
> +    int4 arg0, int4 arg1, int4 arg2, int4 arg3,
> +    int4 arg4, int4 arg5, int4 arg6, int4 arg7,
> +    int arg8)
> +{
> +    // Try to make sure we can't clobber the incoming stack arguments
> +    // with local stack objects.
> +    volatile int4 args[8] = { arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 };
> +    volatile int scalar_arg = arg8;
> +
> +    int4 total = 0;
> +    for (int i = 0; i < 8; ++i)
> +    {
> +        total += args[i];
> +    }
> +
> +    return total * scalar_arg;
> +}
> +
> +// Test a basic tail call
> +NOINLINE
> +int4 tailcall_v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_i32(
> +    int4 arg0, int4 arg1, int4 arg2, int4 arg3,
> +    int4 arg4, int4 arg5, int4 arg6, int4 arg7,
> +    int arg8)
> +{
> +    arg0 *= 2;
> +    return v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_i32(
> +        arg0, arg1, arg2, arg3, arg4,
> +        arg5, arg6, arg7, arg8);
> +}
> +
> +kernel void kernel_call_tailcall_stack_passed_args(global int4* output,
> +                                                   global int4* input0,
> +                                                   global int4* input1,
> +                                                   global int4* input2,
> +                                                   global int4* input3,
> +                                                   global int4* input4,
> +                                                   global int4* input5,
> +                                                   global int4* input6,
> +                                                   global int4* input7,
> +                                                   global int* input8)
> +{
> +    int id = get_global_id(0);
> +    output[id] = tailcall_v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_i32(
> +        input0[id],
> +        input1[id],
> +        input2[id],
> +        input3[id],
> +        input4[id],
> +        input5[id],
> +        input6[id],
> +        input7[id],
> +        input8[id]);
> +}

-- 
Jan Vesely <jan.vesely at rutgers.edu>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part
URL: <https://lists.freedesktop.org/archives/piglit/attachments/20170919/5f79b516/attachment-0001.sig>


More information about the Piglit mailing list