[Beignet] [PATCH 1/2] enable llvm intrinsic call bswap function.
Zhigang Gong
zhigang.gong at linux.intel.com
Tue Nov 4 03:03:01 PST 2014
It's better to put a FIXME tag in the code to indicate there should be
better implementation than this unoptimized version.
The optimized version could leverage Gen's register region/indirect
addressing to optimize the byte swap operations. Let's defer this
type of optimization to next release.
Thanks for the patch. Will push latter.
On Tue, Nov 04, 2014 at 06:42:35AM +0800, xionghu.luo at intel.com wrote:
> From: Luo Xionghu <xionghu.luo at intel.com>
>
> this intrinsic call is implemented at the GEN IR level currently,
> should be optimazed later.
>
> Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
> ---
> backend/src/llvm/llvm_gen_backend.cpp | 87 +++++++++++++++++++++++++++++++++
> 1 file changed, 87 insertions(+)
>
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
> index feb881d..4b6cfe7 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -2827,6 +2827,93 @@ namespace gbe
> NOT_IMPLEMENTED;
> break;
> case Intrinsic::bswap:
> + {
> + Type *llvmDstType = I.getType();
> + uint32_t elementSize = getTypeByteSize(unit, llvmDstType);
> +
> + const ir::Register dst0 = this->getRegister(&I);
> + const ir::Register src0 = this->getRegister(I.getOperand(0));
> + switch(elementSize)
> + {
> + case 2:
> + {
> + ir::Type srcType = getUnsignedType(ctx, llvmDstType);
> + ir::Register tmp1 = ctx.reg(getFamily(srcType));
> + ir::Register tmp2 = ctx.reg(getFamily(srcType));
> +
> + ir::Register regWMask = ctx.reg( ir::FAMILY_WORD );
> + const ir::ImmediateIndex wMask = ctx.newIntegerImmediate(0x00FF, ir::TYPE_S16);
> + ir::Register regShift = ctx.reg( ir::FAMILY_WORD );
> + const ir::ImmediateIndex shift = ctx.newIntegerImmediate(8, ir::TYPE_S16);
> +
> + ctx.LOADI(ir::TYPE_S16, regWMask, wMask);
> + ctx.AND(srcType, tmp1, src0, regWMask);
> +
> + ctx.LOADI(ir::TYPE_S16, regShift, shift);
> + ctx.SHL(srcType, tmp2, tmp1, regShift);
> +
> + ir::Register tmp3 = ctx.reg( getFamily(srcType) );
> + ctx.SHR(srcType, tmp3, src0, regShift);
> +
> + ctx.OR(srcType, dst0, tmp2, tmp3);
> + }
> + break;
> + case 4:
> + {
> + ir::Type srcType = getUnsignedType(ctx, llvmDstType);
> + ir::Register tmp1 = ctx.reg(getFamily(srcType));
> + ir::Register tmp2 = ctx.reg(getFamily(srcType));
> + ir::Register tmp3 = ctx.reg(getFamily(srcType));
> + ir::Register tmp4 = ctx.reg(getFamily(srcType));
> + ir::Register tmp5 = ctx.reg(getFamily(srcType));
> + ir::Register tmp6 = ctx.reg(getFamily(srcType));
> + ir::Register tmp7 = ctx.reg(getFamily(srcType));
> + ir::Register tmp8 = ctx.reg(getFamily(srcType));
> +
> + ir::Register regDWMask = ctx.reg( ir::FAMILY_DWORD );
> + ir::Register regShift = ctx.reg( ir::FAMILY_DWORD );
> + ir::ImmediateIndex wMask = ctx.newIntegerImmediate(0x000000FF, ir::TYPE_S32);
> + ir::ImmediateIndex shift = ctx.newIntegerImmediate(24, ir::TYPE_S32);
> + ctx.LOADI(ir::TYPE_S32, regDWMask, wMask);
> + ctx.AND(srcType, tmp1, src0, regDWMask);
> + ctx.LOADI(ir::TYPE_S32, regShift, shift);
> + ctx.SHL(srcType, tmp2, tmp1, regShift);
> +
> + wMask = ctx.newIntegerImmediate(0x0000FF00, ir::TYPE_S32);
> + shift = ctx.newIntegerImmediate(8, ir::TYPE_S32);
> + ctx.LOADI(ir::TYPE_S32, regDWMask, wMask);
> + ctx.AND(srcType, tmp3, src0, regDWMask);
> + ctx.LOADI(ir::TYPE_S32, regShift, shift);
> + ctx.SHL(srcType, tmp4, tmp3, regShift);
> +
> + wMask = ctx.newIntegerImmediate(0x00FF0000, ir::TYPE_S32);
> + shift = ctx.newIntegerImmediate(8, ir::TYPE_S32);
> + ctx.LOADI(ir::TYPE_S32, regDWMask, wMask);
> + ctx.AND(srcType, tmp5, src0, regDWMask);
> + ctx.LOADI(ir::TYPE_S32, regShift, shift);
> + ctx.SHR(srcType, tmp6, tmp5, regShift);
> +
> + wMask = ctx.newIntegerImmediate(0xFF000000, ir::TYPE_S32);
> + shift = ctx.newIntegerImmediate(24, ir::TYPE_S32);
> + ctx.LOADI(ir::TYPE_S32, regDWMask, wMask);
> + ctx.AND(srcType, tmp7, src0, regDWMask);
> + ctx.LOADI(ir::TYPE_S32, regShift, shift);
> + ctx.SHR(srcType, tmp8, tmp7, regShift);
> +
> + ir::Register tmp9 = ctx.reg(getFamily(srcType));
> + ir::Register tmp10 = ctx.reg(getFamily(srcType));
> + ctx.OR(srcType, tmp9, tmp2, tmp4);
> + ctx.OR(srcType, tmp10, tmp6, tmp8);
> + ctx.OR(srcType, dst0, tmp9, tmp10);
> + }
> + break;
> + case 8:
> + NOT_IMPLEMENTED;
> + break;
> + default:
> + GBE_ASSERT(0);
> + }
> + }
> break;
> default: NOT_IMPLEMENTED;
> }
> --
> 1.7.9.5
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list