[Beignet] [PATCH 8/9 V3] Backend: Delete bswap logic in the llvm_to_gen stage.

junyan.he at inbox.com junyan.he at inbox.com
Mon Mar 9 01:11:28 PDT 2015


From: Junyan He <junyan.he at linux.intel.com>

We move the bswap logic from llvm_to_gen to backend for
efficienc using indirect mode.

Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
 backend/src/ir/instruction.hpp        |    2 +
 backend/src/ir/instruction.hxx        |    1 +
 backend/src/llvm/llvm_gen_backend.cpp |   85 +--------------------------------
 3 files changed, 5 insertions(+), 83 deletions(-)

diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 6963111..24d27aa 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -620,6 +620,8 @@ namespace ir {
   Instruction RNDU(Type type, Register dst, Register src);
   /*! rndz.type dst src */
   Instruction RNDZ(Type type, Register dst, Register src);
+  /*! bswap.type dst src */
+  Instruction BSWAP(Type type, Register dst, Register src);
   /*! pow.type dst src0 src1 */
   Instruction POW(Type type, Register dst, Register src0, Register src1);
   /*! mul.type dst src0 src1 */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index b52673e..de4abfb 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -40,6 +40,7 @@ DECL_INSN(RNDU, UnaryInstruction)
 DECL_INSN(RNDZ, UnaryInstruction)
 DECL_INSN(SIMD_ANY, UnaryInstruction)
 DECL_INSN(SIMD_ALL, UnaryInstruction)
+DECL_INSN(BSWAP, UnaryInstruction)
 DECL_INSN(POW, BinaryInstruction)
 DECL_INSN(MUL, BinaryInstruction)
 DECL_INSN(ADD, BinaryInstruction)
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index aad638f..74c80ee 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2943,89 +2943,6 @@ namespace gbe
           case Intrinsic::umul_with_overflow:
           NOT_IMPLEMENTED;
           break;
-          case Intrinsic::bswap:
-          {
-            // FIXME, this is an unoptimized version, could be optimized by
-            // leveraging GEN's register region/indirect address feature.
-            Type *llvmDstType = I.getType();
-            uint32_t elementSize = getTypeByteSize(unit, llvmDstType);
-
-            const ir::Register dst0  = this->getRegister(&I);
-            const ir::Register src0 = this->getRegister(I.getOperand(0));
-            switch(elementSize)
-            {
-              case 2:
-                {
-                  ir::Type srcType = getUnsignedType(ctx, llvmDstType);
-                  ir::Register tmp1 = ctx.reg(getFamily(srcType));
-                  ir::Register tmp2 = ctx.reg(getFamily(srcType));
-
-                  ir::Register regWMask = ctx.reg( ir::FAMILY_WORD );
-                  const ir::ImmediateIndex wMask = ctx.newIntegerImmediate(0x00FF, ir::TYPE_S16);
-                  ir::Register regShift = ctx.reg( ir::FAMILY_WORD );
-                  const ir::ImmediateIndex shift = ctx.newIntegerImmediate(8, ir::TYPE_S16);
-
-                  ctx.LOADI(ir::TYPE_S16, regWMask, wMask);
-                  ctx.AND(srcType, tmp1, src0, regWMask);
-
-                  ctx.LOADI(ir::TYPE_S16, regShift, shift);
-                  ctx.SHL(srcType, tmp2, tmp1, regShift);
-
-                  ir::Register tmp3 = ctx.reg( getFamily(srcType) );
-                  ctx.SHR(srcType, tmp3, src0, regShift);
-
-                  ctx.OR(srcType, dst0, tmp2, tmp3);
-                }
-                break;
-              case 4:
-                {
-                  ir::Type srcType = getType(ctx, llvmDstType);
-                  ir::Register tmp1 = ctx.reg(getFamily(srcType));
-                  ir::Register tmp2 = ctx.reg(getFamily(srcType));
-                  ir::Register tmp3 = ctx.reg(getFamily(srcType));
-                  ir::Register tmp4 = ctx.reg(getFamily(srcType));
-                  ir::Register tmp5 = ctx.reg(getFamily(srcType));
-                  ir::Register tmp6 = ctx.reg(getFamily(srcType));
-
-                  ir::Register regDWMask = ctx.reg( ir::FAMILY_DWORD );
-                  ir::Register regShift_8 = ctx.reg( ir::FAMILY_DWORD );
-                  ir::Register regShift_24 = ctx.reg( ir::FAMILY_DWORD );
-                  ir::ImmediateIndex wMask_L = ctx.newIntegerImmediate(0x0000FF00, ir::TYPE_S32);
-                  ir::ImmediateIndex wMask_H = ctx.newIntegerImmediate(0x00FF0000, ir::TYPE_S32);
-                  ir::ImmediateIndex shift_8 = ctx.newIntegerImmediate(8, ir::TYPE_S32);
-                  ir::ImmediateIndex shift_24 = ctx.newIntegerImmediate(24, ir::TYPE_S32);
-
-                  ctx.LOADI(ir::TYPE_S32, regShift_24, shift_24);
-                  ctx.SHL(srcType, tmp1, src0, regShift_24);
-
-                  ctx.LOADI(ir::TYPE_S32, regDWMask, wMask_L);
-                  ctx.AND(srcType, tmp2, src0, regDWMask);
-                  ctx.LOADI(ir::TYPE_S32, regShift_8, shift_8);
-                  ctx.SHL(srcType, tmp3, tmp2, regShift_8);
-
-                  ctx.LOADI(ir::TYPE_S32, regDWMask, wMask_H);
-                  ctx.AND(srcType, tmp4, src0, regDWMask);
-                  ctx.LOADI(ir::TYPE_S32, regShift_8, shift_8);
-                  ctx.SHR(makeTypeUnsigned(srcType), tmp5, tmp4, regShift_8);
-
-                  ctx.LOADI(ir::TYPE_S32, regShift_24, shift_24);
-                  ctx.SHR(makeTypeUnsigned(srcType), tmp6, src0, regShift_24);
-
-                  ir::Register tmp7 = ctx.reg(getFamily(srcType));
-                  ir::Register tmp8 = ctx.reg(getFamily(srcType));
-                  ctx.OR(srcType, tmp7, tmp1, tmp3);
-                  ctx.OR(srcType, tmp8, tmp5, tmp6);
-                  ctx.OR(srcType, dst0, tmp7, tmp8);
-                }
-                break;
-              case 8:
-                NOT_IMPLEMENTED;
-                break;
-              default:
-                GBE_ASSERT(0);
-            }
-          }
-          break;
           case Intrinsic::ctlz:
           {
             Type *llvmDstType = I.getType();
@@ -3085,6 +3002,8 @@ namespace gbe
           case Intrinsic::cos: this->emitUnaryCallInst(I,CS,ir::OP_COS); break;
           case Intrinsic::log2: this->emitUnaryCallInst(I,CS,ir::OP_LOG); break;
           case Intrinsic::exp2: this->emitUnaryCallInst(I,CS,ir::OP_EXP); break;
+          case Intrinsic::bswap:
+            this->emitUnaryCallInst(I,CS,ir::OP_BSWAP, getUnsignedType(ctx, I.getType())); break;
           default: NOT_IMPLEMENTED;
         }
       } else {
-- 
1.7.9.5



More information about the Beignet mailing list