[Mesa-dev] [PATCH] libclc: implement rotate builtin

Tom Stellard tom at stellard.net
Sat Mar 23 20:48:14 PDT 2013


On Sat, Mar 23, 2013 at 12:37:38PM -0500, Aaron Watry wrote:
> This implementation does a lot of bit shifting and masking. Suffice to say,
> this is somewhat suboptimal... but it does look to produce correct results
> (after the piglit tests were corrected for sign extension issues).
> 
> Someone who knows LLVM better than I could re-write this more efficiently.

Hi Aaron,

I haven't test this code at all, but there a few things you can do to
check if your rotate implementation is producing optimal code.  The first is
to compile your cl code with the -debug-only=isel option (I'm not sure
if clang accepts this option, if it doesn't you can compile your cl code
to LLVM IR and then compile it to R600 assembly with llc.  I know llc accepts
this option).  This option will dump the instruction selection DAG, and
if you see any rotl nodes, then you know your implementation is
optimized.

If you don't see any rotl nodes in the isel dump, take a look at the
MatchRotate() function in llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
this will tell you which patterns are lowered to rotl.

On R600, rotl nodes should be lowered to BFE instructions, and I think
there is even a test in llvm/test/CodeGen/R600 for this.  However, I
don't think we do this for any types other than i32 yet.

-Tom

> ---
>  generic/include/clc/clc.h               |    1 +
>  generic/include/clc/integer/gentype.inc |   11 ++++++++++
>  generic/include/clc/integer/rotate.h    |    2 ++
>  generic/include/clc/integer/rotate.inc  |    1 +
>  generic/lib/SOURCES                     |    1 +
>  generic/lib/integer/rotate.cl           |    4 ++++
>  generic/lib/integer/rotate.inc          |   35 +++++++++++++++++++++++++++++++
>  7 files changed, 55 insertions(+)
>  create mode 100644 generic/include/clc/integer/rotate.h
>  create mode 100644 generic/include/clc/integer/rotate.inc
>  create mode 100644 generic/lib/integer/rotate.cl
>  create mode 100644 generic/lib/integer/rotate.inc
> 
> diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h
> index c3d7d59..72f518a 100644
> --- a/generic/include/clc/clc.h
> +++ b/generic/include/clc/clc.h
> @@ -63,6 +63,7 @@
>  #include <clc/integer/abs.h>
>  #include <clc/integer/abs_diff.h>
>  #include <clc/integer/add_sat.h>
> +#include <clc/integer/rotate.h>
>  #include <clc/integer/sub_sat.h>
>  
>  /* 6.11.2 and 6.11.3 Shared Integer/Math Functions */
> diff --git a/generic/include/clc/integer/gentype.inc b/generic/include/clc/integer/gentype.inc
> index 0b32efd..005b9af 100644
> --- a/generic/include/clc/integer/gentype.inc
> +++ b/generic/include/clc/integer/gentype.inc
> @@ -1,3 +1,4 @@
> +#define GENSIZE 8
>  #define GENTYPE char
>  #define UGENTYPE uchar
>  #define SGENTYPE char
> @@ -94,6 +95,9 @@
>  #undef UGENTYPE
>  #undef SGENTYPE
>  
> +#undef GENSIZE
> +#define GENSIZE 16
> +
>  #define GENTYPE short
>  #define UGENTYPE ushort
>  #define SGENTYPE short
> @@ -190,6 +194,9 @@
>  #undef UGENTYPE
>  #undef SGENTYPE
>  
> +#undef GENSIZE
> +#define GENSIZE 32
> +
>  #define GENTYPE int
>  #define UGENTYPE uint
>  #define SGENTYPE int
> @@ -286,6 +293,9 @@
>  #undef UGENTYPE
>  #undef SGENTYPE
>  
> +#undef GENSIZE
> +#define GENSIZE 64
> +
>  #define GENTYPE long
>  #define UGENTYPE ulong
>  #define SGENTYPE long
> @@ -382,4 +392,5 @@
>  #undef UGENTYPE
>  #undef SGENTYPE
>  
> +#undef GENSIZE
>  #undef BODY
> diff --git a/generic/include/clc/integer/rotate.h b/generic/include/clc/integer/rotate.h
> new file mode 100644
> index 0000000..e163bc8
> --- /dev/null
> +++ b/generic/include/clc/integer/rotate.h
> @@ -0,0 +1,2 @@
> +#define BODY <clc/integer/rotate.inc>
> +#include <clc/integer/gentype.inc>
> diff --git a/generic/include/clc/integer/rotate.inc b/generic/include/clc/integer/rotate.inc
> new file mode 100644
> index 0000000..5720e1c
> --- /dev/null
> +++ b/generic/include/clc/integer/rotate.inc
> @@ -0,0 +1 @@
> +_CLC_OVERLOAD _CLC_DECL GENTYPE rotate(GENTYPE x, GENTYPE y);
> diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
> index f639c83..495b3e7 100644
> --- a/generic/lib/SOURCES
> +++ b/generic/lib/SOURCES
> @@ -8,6 +8,7 @@ integer/abs_diff.cl
>  integer/add_sat.cl
>  integer/add_sat.ll
>  integer/add_sat_impl.ll
> +integer/rotate.cl
>  integer/sub_sat.cl
>  integer/sub_sat.ll
>  integer/sub_sat_impl.ll
> diff --git a/generic/lib/integer/rotate.cl b/generic/lib/integer/rotate.cl
> new file mode 100644
> index 0000000..d7eff2b
> --- /dev/null
> +++ b/generic/lib/integer/rotate.cl
> @@ -0,0 +1,4 @@
> +#include <clc/clc.h>
> +
> +#define BODY <rotate.inc>
> +#include <clc/integer/gentype.inc>
> diff --git a/generic/lib/integer/rotate.inc b/generic/lib/integer/rotate.inc
> new file mode 100644
> index 0000000..e83dd51
> --- /dev/null
> +++ b/generic/lib/integer/rotate.inc
> @@ -0,0 +1,35 @@
> +/**
> + * Not necessarily optimal... but it produces correct results (at least for int)
> + * If we're lucky, LLVM will recognize the pattern and produce rotate
> + * instructions:
> + * http://llvm.1065342.n5.nabble.com/rotate-td47679.html
> + * 
> + * Eventually, someone should feel free to implement an llvm-specific version
> + */
> +
> +_CLC_OVERLOAD _CLC_DEF GENTYPE rotate(GENTYPE x, GENTYPE n){
> +    //Try to avoid extra work if someone's spinning the value through multiple
> +    //full rotations
> +    n = n % (GENTYPE)GENSIZE;
> +    
> +    //Determine if we're doing a right or left shift on each component
> +    //The actual shift algorithm is based on a rotate right
> +    //e.g. a rotate of int by 5 bits becomes rotate right by 26 bits
> +    //     and a rotate of int by -4 bits becomes rotate right by 4
> +    GENTYPE amt = (n > (GENTYPE)0 ? (GENTYPE)GENSIZE - n : (GENTYPE)0 - n );
> +    
> +    //Calculate the bits that will wrap
> +    GENTYPE mask = ( (GENTYPE)1 << amt ) - (GENTYPE)1;
> +    GENTYPE wrapped_bits = x & mask;
> +    
> +    //Shift the input value right and then AND a mask that eliminates
> +    //sign-extension interference
> +    //if the rotate amount is 0, just use ~0 for a mask
> +    GENTYPE se_mask = !amt ? ~((GENTYPE)0) : 
> +        ( ( (GENTYPE)1 << ((GENTYPE)GENSIZE - amt) ) - (GENTYPE)1 );
> +    GENTYPE unwrapped_bits = x >> amt;
> +    unwrapped_bits &= se_mask;
> +    
> +    //Finally shift the input right after moving the wrapped bits into position
> +    return unwrapped_bits | (wrapped_bits << ( (GENTYPE)GENSIZE - amt ) );
> +}
> \ No newline at end of file
> -- 
> 1.7.10.4
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list