[Beignet] [PATCH 1/2] reimplement the LZD instruction in backend.

xionghu.luo at intel.com xionghu.luo at intel.com
Fri Jan 23 00:14:54 PST 2015


From: Luo Xionghu <xionghu.luo at intel.com>

handle the byte/word/dword/qword input accordingly.

Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
---
 backend/src/llvm/llvm_gen_backend.cpp | 81 ++++++++++++++++++++++++++++++++++-
 1 file changed, 79 insertions(+), 2 deletions(-)

diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 86030b9..559c13b 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -3286,10 +3286,87 @@ error:
           break;
           case Intrinsic::ctlz:
           {
-            ir::Type srcType = getType(ctx, I.getType());
+            Type *llvmDstType = I.getType();
+            ir::Type dstType = getType(ctx, llvmDstType);
+            Type *llvmSrcType = I.getOperand(0)->getType();
+            ir::Type srcType = getUnsignedType(ctx, llvmSrcType);
+
             const ir::Register dst = this->getRegister(&I);
             const ir::Register src = this->getRegister(I.getOperand(0));
-            ctx.ALU1(ir::OP_LZD, srcType, dst, src);
+            if(srcType == ir::TYPE_U16) {
+              ir::ImmediateIndex imm;
+              ir::Type tmpType = ir::TYPE_S32;
+              const ir::RegisterFamily family = getFamily(tmpType);
+              imm = ctx.newIntegerImmediate(16, tmpType);
+              const ir::Register immReg = ctx.reg(family);
+              ctx.LOADI(ir::TYPE_S32, immReg, imm);
+
+              ir::Register tmp0 = ctx.reg(getFamily(tmpType));
+              ir::Register tmp1 = ctx.reg(getFamily(tmpType));
+              ir::Register tmp2 = ctx.reg(getFamily(tmpType));
+              ctx.CVT(tmpType, srcType, tmp0, src);
+              ctx.ALU1(ir::OP_LZD, tmpType, tmp1, tmp0);
+              ctx.SUB(tmpType, tmp2, tmp1, immReg);
+              ctx.CVT(dstType, tmpType, dst, tmp2);
+            }else if(srcType == ir::TYPE_U8) {
+              ir::ImmediateIndex imm;
+              ir::Type tmpType = ir::TYPE_U32;
+              const ir::RegisterFamily family = getFamily(tmpType);
+              imm = ctx.newIntegerImmediate(24, tmpType);
+              const ir::Register immReg = ctx.reg(family);
+              ctx.LOADI(ir::TYPE_S32, immReg, imm);
+
+              ir::Register tmp0 = ctx.reg(getFamily(tmpType));
+              ir::Register tmp1 = ctx.reg(getFamily(tmpType));
+              ir::Register tmp2 = ctx.reg(getFamily(tmpType));
+              ctx.CVT(tmpType, srcType, tmp0, src);
+              ctx.ALU1(ir::OP_LZD, tmpType, tmp1, tmp0);
+              ctx.SUB(tmpType, tmp2, tmp1, immReg);
+              ctx.CVT(dstType, tmpType, dst, tmp2);
+            }else if(srcType == ir::TYPE_U64) {
+              ir::ImmediateIndex imm;
+              ir::Type tmpType = ir::TYPE_U32;
+              const ir::RegisterFamily family = getFamily(srcType);
+              imm = ctx.newIntegerImmediate(32, srcType);
+              const ir::Register immReg = ctx.reg(family);
+              ctx.LOADI(ir::TYPE_S64, immReg, imm);
+
+              ir::ImmediateIndex immMask_L = ctx.newIntegerImmediate(0xFFFFFFFF, srcType);
+              const ir::Register regMask_L = ctx.reg(family);
+              ctx.LOADI(ir::TYPE_S64, regMask_L, immMask_L);
+              ir::Register qwSrc_L = ctx.reg(getFamily(srcType));
+              ctx.AND(srcType, qwSrc_L, src, regMask_L);
+
+              const ir::ImmediateIndex imm32 = ctx.newIntegerImmediate(32, tmpType);
+              const ir::Register imm32Reg = ctx.reg(tmpFamily);
+              ctx.LOADI(ir::TYPE_S32, imm32Reg, imm32);
+
+              ir::Register tmp0 = ctx.reg(getFamily(srcType));
+              ir::Register tmp1 = ctx.reg(getFamily(tmpType));
+              ir::Register tmp2 = ctx.reg(getFamily(tmpType));
+              ir::Register tmp3 = ctx.reg(getFamily(tmpType));
+              ir::Register tmp4 = ctx.reg(getFamily(tmpType));
+              ir::Register tmp5 = ctx.reg(getFamily(tmpType));
+              ir::Register tmp6 = ctx.reg(getFamily(tmpType));
+              ir::Register cmp = ctx.reg(ir::FAMILY_BOOL);
+
+              ctx.SHR(srcType, tmp0, src, immReg);
+              ctx.CVT(tmpType, srcType, tmp1, tmp0);
+
+              ctx.ALU1(ir::OP_LZD, tmpType, tmp2, tmp1);
+              ctx.LT(tmpType, cmp, tmp2, imm32Reg);
+
+              ctx.CVT(tmpType, srcType, tmp3, qwSrc_L);
+              ctx.ALU1(ir::OP_LZD, tmpType, tmp4, tmp3);
+              ctx.ADD(tmpType, tmp5, tmp4, imm32Reg);
+
+              ctx.SEL(tmpType, tmp6, cmp, tmp2, tmp5);
+              ctx.CVT(dstType, tmpType, dst, tmp6);
+            }
+            else
+            {
+              ctx.ALU1(ir::OP_LZD, dstType, dst, src);
+            }
           }
           break;
           case Intrinsic::fma:
-- 
1.9.1



More information about the Beignet mailing list