[Beignet] [PATCH 1/2] support built-in function "upsample"

Song, Ruiling ruiling.song at intel.com
Fri Jul 5 00:53:06 PDT 2013


LGTM.

-----Original Message-----
From: beignet-bounces+ruiling.song=intel.com at lists.freedesktop.org [mailto:beignet-bounces+ruiling.song=intel.com at lists.freedesktop.org] On Behalf Of Homer Hsing
Sent: Friday, July 05, 2013 3:38 PM
To: beignet at lists.freedesktop.org
Cc: Xing, Homer
Subject: [Beignet] [PATCH 1/2] support built-in function "upsample"


Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
 backend/src/backend/gen_context.cpp        |  2 ++
 backend/src/backend/gen_encoder.cpp        | 24 +++++++++++++++++
 backend/src/backend/gen_encoder.hpp        |  2 ++
 backend/src/backend/gen_insn_selection.cpp |  8 ++++++  backend/src/backend/gen_insn_selection.hxx |  2 ++
 backend/src/ir/instruction.cpp             |  2 ++
 backend/src/ir/instruction.hpp             |  4 +++
 backend/src/ir/instruction.hxx             |  2 ++
 backend/src/llvm/llvm_gen_backend.cpp      | 18 +++++++++++++
 backend/src/llvm/llvm_gen_ocl_function.hxx |  2 ++
 backend/src/ocl_stdlib.h                   | 41 ++++++++++++++++++++++++++++++
 11 files changed, 107 insertions(+)

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index acd9c1d..e33d8da 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -169,6 +169,8 @@ namespace gbe
       case SEL_OP_ADD:  p->ADD(dst, src0, src1); break;
       case SEL_OP_MUL:  p->MUL(dst, src0, src1); break;
       case SEL_OP_MACH: p->MACH(dst, src0, src1); break;
+      case SEL_OP_UPSAMPLE_SHORT: p->UPSAMPLE_SHORT(dst, src0, src1); break;
+      case SEL_OP_UPSAMPLE_INT: p->UPSAMPLE_INT(dst, src0, src1); 
+ break;
       default: NOT_IMPLEMENTED;
     }
   }
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index 25303b4..f84c6dd 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -812,6 +812,30 @@ namespace gbe
     pop();
   }
 
+  void GenEncoder::UPSAMPLE_SHORT(GenRegister dest, GenRegister src0, GenRegister src1) {
+    dest.type = GEN_TYPE_B;
+    dest.hstride = GEN_HORIZONTAL_STRIDE_2;
+    src0.type = GEN_TYPE_B;
+    src0.hstride = GEN_HORIZONTAL_STRIDE_2;
+    src1.type = GEN_TYPE_B;
+    src1.hstride = GEN_HORIZONTAL_STRIDE_2;
+    MOV(dest, src1);
+    dest.subnr ++;
+    MOV(dest, src0);
+  }
+
+  void GenEncoder::UPSAMPLE_INT(GenRegister dest, GenRegister src0, GenRegister src1) {
+    dest.type = GEN_TYPE_W;
+    dest.hstride = GEN_HORIZONTAL_STRIDE_2;
+    src0.type = GEN_TYPE_W;
+    src0.hstride = GEN_HORIZONTAL_STRIDE_2;
+    src1.type = GEN_TYPE_W;
+    src1.hstride = GEN_HORIZONTAL_STRIDE_2;
+    MOV(dest, src1);
+    dest.subnr += 2;
+    MOV(dest, src0);
+  }
+
   void GenEncoder::MOV_DF(GenRegister dest, GenRegister src0, GenRegister r) {
     int w = curr.execWidth;
     if (src0.isdf()) {
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index a7cbc89..d3a7165 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -92,6 +92,8 @@ namespace gbe
     ALU1(MOV)
     ALU1(FBH)
     ALU1(FBL)
+    ALU2(UPSAMPLE_SHORT)
+    ALU2(UPSAMPLE_INT)
     ALU1(RNDZ)
     ALU1(RNDE)
     ALU1(RNDD)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index bfe1e28..d4be8bf 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -438,6 +438,8 @@ namespace gbe
     ALU1(FBL)
     ALU3(HADD)
     ALU3(RHADD)
+    ALU2(UPSAMPLE_SHORT)
+    ALU2(UPSAMPLE_INT)
 #undef ALU1
 #undef ALU2
 #undef ALU3
@@ -1451,6 +1453,12 @@ namespace gbe
             sel.RHADD(dst, src0, src1, temp);
             break;
           }
+        case OP_UPSAMPLE_SHORT:
+          sel.UPSAMPLE_SHORT(dst, src0, src1);
+          break;
+        case OP_UPSAMPLE_INT:
+          sel.UPSAMPLE_INT(dst, src0, src1);
+          break;
         default: NOT_IMPLEMENTED;
       }
       sel.pop();
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index c85d328..33c3937 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -47,3 +47,5 @@ DECL_SELECTION_IR(FBH, UnaryInstruction)  DECL_SELECTION_IR(FBL, UnaryInstruction)  DECL_SELECTION_IR(HADD, TernaryInstruction)  DECL_SELECTION_IR(RHADD, TernaryInstruction)
+DECL_SELECTION_IR(UPSAMPLE_SHORT, BinaryInstruction) 
+DECL_SELECTION_IR(UPSAMPLE_INT, BinaryInstruction)
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index 21b82ce..2a77454 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -1330,6 +1330,8 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType())
   DECL_EMIT_FUNCTION(SUB)
   DECL_EMIT_FUNCTION(SUBSAT)
   DECL_EMIT_FUNCTION(MUL_HI)
+  DECL_EMIT_FUNCTION(UPSAMPLE_SHORT)
+  DECL_EMIT_FUNCTION(UPSAMPLE_INT)
   DECL_EMIT_FUNCTION(DIV)
   DECL_EMIT_FUNCTION(REM)
   DECL_EMIT_FUNCTION(SHL)
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index fc1c984..48e6963 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -519,6 +519,10 @@ namespace ir {
   Instruction SIN(Type type, Register dst, Register src);
   /*! mul_hi.type dst src */
   Instruction MUL_HI(Type type, Register dst, Register src0, Register src1);
+  /*! upsample_short.type dst src */
+  Instruction UPSAMPLE_SHORT(Type type, Register dst, Register src0, 
+ Register src1);  /*! upsample_int.type dst src */  Instruction 
+ UPSAMPLE_INT(Type type, Register dst, Register src0, Register src1);
   /*! fbh.type dst src */
   Instruction FBH(Type type, Register dst, Register src);
   /*! fbl.type dst src */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index 0e1c575..b9f0e73 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -77,3 +77,5 @@ DECL_INSN(FBH, UnaryInstruction)  DECL_INSN(FBL, UnaryInstruction)  DECL_INSN(HADD, BinaryInstruction)  DECL_INSN(RHADD, BinaryInstruction)
+DECL_INSN(UPSAMPLE_SHORT, BinaryInstruction) DECL_INSN(UPSAMPLE_INT, 
+BinaryInstruction)
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 260e3c1..564e441 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -1829,6 +1829,8 @@ namespace gbe
       }
       case GEN_OCL_MUL_HI_INT:
       case GEN_OCL_MUL_HI_UINT:
+      case GEN_OCL_UPSAMPLE_SHORT:
+      case GEN_OCL_UPSAMPLE_INT:
       case GEN_OCL_SADD_SAT_CHAR:
       case GEN_OCL_SADD_SAT_SHORT:
       case GEN_OCL_SADD_SAT_INT:
@@ -2209,6 +2211,22 @@ namespace gbe
             ctx.MUL_HI(getUnsignedType(ctx, I.getType()), dst, src0, src1);
             break;
           }
+          case GEN_OCL_UPSAMPLE_SHORT:
+          {
+            GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
+            GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
+            const ir::Register dst = this->getRegister(&I);
+            ctx.UPSAMPLE_SHORT(getType(ctx, I.getType()), dst, src0, src1);
+            break;
+          }
+          case GEN_OCL_UPSAMPLE_INT:
+          {
+            GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
+            GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
+            const ir::Register dst = this->getRegister(&I);
+            ctx.UPSAMPLE_INT(getType(ctx, I.getType()), dst, src0, src1);
+            break;
+          }
           case GEN_OCL_SADD_SAT_CHAR:
           case GEN_OCL_SADD_SAT_SHORT:
           case GEN_OCL_SADD_SAT_INT:
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index f448a50..8e940bc 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -133,3 +133,5 @@ DECL_LLVM_GEN_FUNCTION(FBL, __gen_ocl_fbl)  DECL_LLVM_GEN_FUNCTION(ABS, __gen_ocl_abs)  DECL_LLVM_GEN_FUNCTION(HADD, __gen_ocl_hadd)  DECL_LLVM_GEN_FUNCTION(RHADD, __gen_ocl_rhadd)
+DECL_LLVM_GEN_FUNCTION(UPSAMPLE_SHORT, _Z18__gen_ocl_upsampless) 
+DECL_LLVM_GEN_FUNCTION(UPSAMPLE_INT, _Z18__gen_ocl_upsampleii)
diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h index bceac86..090e2b7 100644
--- a/backend/src/ocl_stdlib.h
+++ b/backend/src/ocl_stdlib.h
@@ -4480,6 +4480,47 @@ DEF(16)
 #undef DEC8
 #undef DEC16
 
+OVERLOADABLE short __gen_ocl_upsample(short hi, short lo); OVERLOADABLE 
+int __gen_ocl_upsample(int hi, int lo); INLINE_OVERLOADABLE short 
+upsample(char hi, uchar lo) { return __gen_ocl_upsample((short)hi, 
+(short)lo); } INLINE_OVERLOADABLE ushort upsample(uchar hi, uchar lo) { 
+return __gen_ocl_upsample((short)hi, (short)lo); } INLINE_OVERLOADABLE 
+int upsample(short hi, ushort lo) { return __gen_ocl_upsample((int)hi, 
+(int)lo); } INLINE_OVERLOADABLE uint upsample(ushort hi, ushort lo) { 
+return __gen_ocl_upsample((int)hi, (int)lo); } #define DEC2(type, 
+type2) INLINE_OVERLOADABLE type2##2 upsample(type##2 a, type##2 b) { 
+return (type2##2)(upsample(a.s0, b.s0), upsample(a.s1, b.s1)); } 
+#define DEC3(type, type2) INLINE_OVERLOADABLE type2##3 upsample(type##3 
+a, type##3 b) { return (type2##3)(upsample(a.s0, b.s0), upsample(a.s1, 
+b.s1), upsample(a.s2, b.s2)); } #define DEC4(type, type2) 
+INLINE_OVERLOADABLE type2##4 upsample(type##4 a, type##4 b) { return 
+(type2##4)(upsample(a.s0, b.s0), upsample(a.s1, b.s1), upsample(a.s2, 
+b.s2), upsample(a.s3, b.s3)); } #define DEC8(type, type2) 
+INLINE_OVERLOADABLE type2##8 upsample(type##8 a, type##8 b) { return 
+(type2##8)(upsample(a.s0, b.s0), upsample(a.s1, b.s1), upsample(a.s2, 
+b.s2), upsample(a.s3, b.s3), upsample(a.s4, b.s4), upsample(a.s5, 
+b.s5), upsample(a.s6, b.s6), upsample(a.s7, b.s7)); } #define 
+DEC16(type, type2) INLINE_OVERLOADABLE type2##16 upsample(type##16 a, 
+type##16 b) { return (type2##16)(upsample(a.s0, b.s0), upsample(a.s1, 
+b.s1), upsample(a.s2, b.s2), upsample(a.s3, b.s3), upsample(a.s4, 
+b.s4), upsample(a.s5, b.s5), upsample(a.s6, b.s6), upsample(a.s7, 
+b.s7), upsample(a.s8, b.s8), upsample(a.s9, b.s9), upsample(a.sa, 
+b.sa), upsample(a.sb, b.sb), upsample(a.sc, b.sc), upsample(a.sd, 
+b.sd), upsample(a.se, b.se), upsample(a.sf, b.sf)); } #define DEF(n) 
+DEC##n(uchar, ushort); DEC##n(ushort, uint)
+DEF(2)
+DEF(3)
+DEF(4)
+DEF(8)
+DEF(16)
+#undef DEF
+#undef DEC2
+#undef DEC3
+#undef DEC4
+#undef DEC8
+#undef DEC16
+#define DEC2(type, type2) INLINE_OVERLOADABLE type2##2 upsample(type##2 
+a, u##type##2 b) { return (type2##2)(upsample(a.s0, b.s0), 
+upsample(a.s1, b.s1)); } #define DEC3(type, type2) INLINE_OVERLOADABLE 
+type2##3 upsample(type##3 a, u##type##3 b) { return 
+(type2##3)(upsample(a.s0, b.s0), upsample(a.s1, b.s1), upsample(a.s2, 
+b.s2)); } #define DEC4(type, type2) INLINE_OVERLOADABLE type2##4 
+upsample(type##4 a, u##type##4 b) { return (type2##4)(upsample(a.s0, 
+b.s0), upsample(a.s1, b.s1), upsample(a.s2, b.s2), upsample(a.s3, 
+b.s3)); } #define DEC8(type, type2) INLINE_OVERLOADABLE type2##8 
+upsample(type##8 a, u##type##8 b) { return (type2##8)(upsample(a.s0, 
+b.s0), upsample(a.s1, b.s1), upsample(a.s2, b.s2), upsample(a.s3, 
+b.s3), upsample(a.s4, b.s4), upsample(a.s5, b.s5), upsample(a.s6, 
+b.s6), upsample(a.s7, b.s7)); } #define DEC16(type, type2) 
+INLINE_OVERLOADABLE type2##16 upsample(type##16 a, u##type##16 b) { 
+return (type2##16)(upsample(a.s0, b.s0), upsample(a.s1, b.s1), 
+upsample(a.s2, b.s2), upsample(a.s3, b.s3), upsample(a.s4, b.s4), 
+upsample(a.s5, b.s5), upsample(a.s6, b.s6), upsample(a.s7, b.s7), 
+upsample(a.s8, b.s8), upsample(a.s9, b.s9), upsample(a.sa, b.sa), 
+upsample(a.sb, b.sb), upsample(a.sc, b.sc), upsample(a.sd, b.sd), 
+upsample(a.se, b.se), upsample(a.sf, b.sf)); } #define DEF(n) 
+DEC##n(char, short); DEC##n(short, int)
+DEF(2)
+DEF(3)
+DEF(4)
+DEF(8)
+DEF(16)
+#undef DEF
+#undef DEC2
+#undef DEC3
+#undef DEC4
+#undef DEC8
+#undef DEC16
+
 PURE CONST uint __gen_ocl_hadd(uint x, uint y);  PURE CONST uint __gen_ocl_rhadd(uint x, uint y);  #define DEC DEF(char); DEF(uchar); DEF(short); DEF(ushort)
--
1.8.1.2

_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list