[Beignet] [PATCH 1/3] add extension cl_intel_media_block_io READ related function

Pan, Xiuli xiuli.pan at intel.com
Wed Mar 8 05:56:02 UTC 2017


1. Missing a #define in libocl header
2. Extension should be added in this patch otherwise in utest patch
3. In IR MediaBlockReadInstruction the width and height no longer need default value
Others LGTM.

-----Original Message-----
From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of xionghu.luo at intel.com
Sent: Friday, March 3, 2017 12:08 AM
To: beignet at lists.freedesktop.org
Cc: Luo, Xionghu <xionghu.luo at intel.com>
Subject: [Beignet] [PATCH 1/3] add extension cl_intel_media_block_io READ related function

From: Luo Xionghu <xionghu.luo at intel.com>

Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
---
 backend/src/backend/gen_insn_selection.cpp |  55 ++++++++++++--
 backend/src/ir/instruction.cpp             |  14 +++-
 backend/src/ir/instruction.hpp             |   4 +-
 backend/src/libocl/tmpl/ocl_simd.tmpl.cl   | 117 ++++++++++++++++++++++++-----
 backend/src/libocl/tmpl/ocl_simd.tmpl.h    |  17 +++++
 backend/src/llvm/llvm_gen_backend.cpp      |  89 +++++++++++++++++++++-
 backend/src/llvm/llvm_gen_ocl_function.hxx |   6 ++
 backend/src/llvm/llvm_scalarize.cpp        |   5 ++
 8 files changed, 274 insertions(+), 33 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 1cab40c..cabc6a3 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -7811,25 +7811,56 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
   /*! Media Block Read pattern */
   DECL_PATTERN(MediaBlockReadInstruction)
   {
+    uint32_t fixBlockSize(const ir::MediaBlockReadInstruction &insn, uint32_t typeSize, uint32_t simdWidth, uint32_t &block_width) const
+    {
+      uint8_t width = insn.getWidth();
+      uint8_t height = insn.getHeight();
+      uint32_t vec_size = insn.getVectorSize();
+      uint32_t blocksize = 0;
+      if (width && height) {
+        if (width * height * typeSize > vec_size * simdWidth * typeSize) {
+          if (width <= simdWidth * vec_size) {
+            height = vec_size * simdWidth / width;
+          } else {
+            height = 1;
+            width = vec_size * simdWidth / height;
+          }
+        }
+      }else {
+        width = simdWidth;
+        height = vec_size;
+      }
+      block_width = typeSize * (width < simdWidth ? width : simdWidth);
+      blocksize = (block_width - 1) % 32 | (height - 1) << 16;
+      return blocksize;
+    }
+
     bool emitOne(Selection::Opaque &sel, const ir::MediaBlockReadInstruction &insn, bool &markChildren) const
     {
       using namespace ir;
       uint32_t vec_size = insn.getVectorSize();
       uint32_t simdWidth = sel.curr.execWidth;
       const Type type = insn.getType();
-      const uint32_t typeSize = type == TYPE_U32 ? 4 : 2;
+      uint32_t typeSize = 0;
+      if(type == TYPE_U32) {
+        typeSize = 4;
+      }else if(type == TYPE_U16) {
+        typeSize = 2;
+      }else if(type == TYPE_U8) {
+        typeSize = 1;
+      }else
+        NOT_IMPLEMENTED;
       uint32_t response_size = simdWidth * vec_size * typeSize / 32;
       // ushort in simd8 will have half reg thus 0.5 reg size, but response lenght is still 1
       response_size = response_size ? response_size : 1;
-      uint32_t block_width = typeSize * simdWidth;
-      uint32_t blocksize = (block_width - 1) % 32 | (vec_size - 1) << 16;
-
+      uint32_t block_width = 0;
+      uint32_t blocksize = fixBlockSize(insn, typeSize, simdWidth, 
+ block_width);
 
       vector<GenRegister> valuesVec;
       vector<GenRegister> tmpVec;
       for (uint32_t i = 0; i < vec_size; ++i) {
         valuesVec.push_back(sel.selReg(insn.getDst(i), type));
-        if(simdWidth == 16 && typeSize == 4)
+        if((simdWidth == 16 && typeSize == 4) || typeSize == 1)
           tmpVec.push_back(GenRegister::ud8grf(sel.reg(FAMILY_REG)));
       }
       const GenRegister coordx = GenRegister::toUniform(sel.selReg(insn.getSrc(0), TYPE_U32), GEN_TYPE_UD); @@ -7855,15 +7886,23 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
         sel.MOV(blocksizereg, GenRegister::immud(blocksize));
       sel.pop();
 
-      if (simdWidth * typeSize < 64) {
+      if (block_width < 64) {
         sel.push();
           sel.curr.execWidth = 8;
           sel.curr.predicate = GEN_PREDICATE_NONE;
           sel.curr.noMask = 1;
           // Now read the data
-          sel.MBREAD(&valuesVec[0], vec_size, header, insn.getImageIndex(), response_size);
+          if(typeSize == 1) {
+            sel.MBREAD(&tmpVec[0], vec_size, header, insn.getImageIndex(), response_size);
+            for (uint32_t i = 0; i < vec_size; i++) {
+              sel.MOV(valuesVec[i], sel.getOffsetReg(GenRegister::retype(tmpVec[0], GEN_TYPE_UB), 0, i*simdWidth));
+              sel.MOV(sel.getOffsetReg(valuesVec[i], 0, 16), sel.getOffsetReg(GenRegister::retype(tmpVec[0], GEN_TYPE_UB), 0, i*simdWidth + 8));
+            }
+          }else
+            sel.MBREAD(&valuesVec[0], vec_size, header, 
+ insn.getImageIndex(), response_size);
+
         sel.pop();
-      } else if (simdWidth * typeSize == 64) {
+      } else if (block_width == 64) {
         sel.push();
           sel.curr.execWidth = 8;
           sel.curr.predicate = GEN_PREDICATE_NONE; diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index f0c3957..4b87e4a 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -1070,7 +1070,7 @@ namespace ir {
       public TupleDstPolicy<MediaBlockReadInstruction>
     {
     public:
-      INLINE MediaBlockReadInstruction(uint8_t imageIdx, Tuple dst, uint8_t vec_size, Tuple srcTuple, uint8_t srcNum, Type type) {
+      INLINE MediaBlockReadInstruction(uint8_t imageIdx, Tuple dst, 
+ uint8_t vec_size, Tuple srcTuple, uint8_t srcNum, Type type, uint8_t 
+ width = 0, uint8_t height = 0) {
         this->opcode = OP_MBREAD;
         this->dst = dst;
         this->dstNum = vec_size;
@@ -1078,6 +1078,8 @@ namespace ir {
         this->srcNum = srcNum;
         this->imageIdx = imageIdx;
         this->type = type;
+        this->width = width;
+        this->height = height;
       }
       INLINE bool wellFormed(const Function &fn, std::string &why) const;
       INLINE void out(std::ostream &out, const Function &fn) const { @@ -1095,6 +1097,8 @@ namespace ir {
       INLINE uint8_t getImageIndex(void) const { return this->imageIdx; }
       INLINE uint8_t getVectorSize(void) const { return this->dstNum; }
       INLINE Type getType(void) const { return this->type; }
+      INLINE uint8_t getWidth(void) const { return this->width; }
+      INLINE uint8_t getHeight(void) const { return this->height; }
 
       Tuple src;
       Tuple dst;
@@ -1102,6 +1106,8 @@ namespace ir {
       uint8_t srcNum;
       uint8_t dstNum;
       Type type;
+      uint8_t width;
+      uint8_t height;
     };
 
     class ALIGNED_INSTRUCTION MediaBlockWriteInstruction :
@@ -2409,6 +2415,8 @@ DECL_MEM_FN(PrintfInstruction, Type, getType(const Function& fn, uint32_t ID), g  DECL_MEM_FN(MediaBlockReadInstruction, uint8_t, getImageIndex(void), getImageIndex())  DECL_MEM_FN(MediaBlockReadInstruction, uint8_t, getVectorSize(void), getVectorSize())  DECL_MEM_FN(MediaBlockReadInstruction, Type, getType(void), getType())
+DECL_MEM_FN(MediaBlockReadInstruction, uint8_t, getWidth(void), 
+getWidth()) DECL_MEM_FN(MediaBlockReadInstruction, uint8_t, 
+getHeight(void), getHeight())
 DECL_MEM_FN(MediaBlockWriteInstruction, uint8_t, getImageIndex(void), getImageIndex())  DECL_MEM_FN(MediaBlockWriteInstruction, uint8_t, getVectorSize(void), getVectorSize())  DECL_MEM_FN(MediaBlockWriteInstruction, Type, getType(void), getType())
@@ -2720,8 +2728,8 @@ DECL_MEM_FN(MemInstruction, void,     setBtiReg(Register reg), setBtiReg(reg))
     return internal::PrintfInstruction(dst, srcTuple, typeTuple, srcNum, bti, num).convert();
   }
 
-  Instruction MBREAD(uint8_t imageIndex, Tuple dst, uint8_t vec_size, Tuple coord, uint8_t srcNum, Type type) {
-    return internal::MediaBlockReadInstruction(imageIndex, dst, vec_size, coord, srcNum, type).convert();
+  Instruction MBREAD(uint8_t imageIndex, Tuple dst, uint8_t vec_size, Tuple coord, uint8_t srcNum, Type type, uint8_t width, uint8_t height) {
+    return internal::MediaBlockReadInstruction(imageIndex, dst, 
+ vec_size, coord, srcNum, type, width, height).convert();
   }
 
   Instruction MBWRITE(uint8_t imageIndex, Tuple srcTuple, uint8_t srcNum, uint8_t vec_size, Type type) { diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 16c2045..7e90576 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -646,6 +646,8 @@ namespace ir {
     uint8_t getImageIndex() const;
     uint8_t getVectorSize() const;
     Type getType(void) const;
+    uint8_t getWidth() const;
+    uint8_t getHeight() const;
   };
 
   /*! Media Block Write.  */
@@ -893,7 +895,7 @@ namespace ir {
   /*! printf */
   Instruction PRINTF(Register dst, Tuple srcTuple, Tuple typeTuple, uint8_t srcNum, uint8_t bti, uint16_t num);
   /*! media block read */
-  Instruction MBREAD(uint8_t imageIndex, Tuple dst, uint8_t vec_size, Tuple coord, uint8_t srcNum, Type type);
+  Instruction MBREAD(uint8_t imageIndex, Tuple dst, uint8_t vec_size, 
+ Tuple coord, uint8_t srcNum, Type type, uint8_t width, uint8_t 
+ height);
   /*! media block write */
   Instruction MBWRITE(uint8_t imageIndex, Tuple srcTuple, uint8_t srcNum, uint8_t vec_size, Type type);  } /* namespace ir */ diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
index 97e33fe..55bf6f0 100644
--- a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
@@ -262,41 +262,61 @@ OVERLOADABLE void intel_sub_group_block_write_ui8(global uint* p,uint8 data)
   __gen_ocl_sub_group_block_write_ui_mem8(p, data);  }
 
-PURE CONST uint __gen_ocl_sub_group_block_read_ui_image(image2d_t p, int x, int y); -PURE CONST uint2 __gen_ocl_sub_group_block_read_ui_image2(image2d_t p, int x, int y); -PURE CONST uint4 __gen_ocl_sub_group_block_read_ui_image4(image2d_t p, int x, int y); -PURE CONST uint8 __gen_ocl_sub_group_block_read_ui_image8(image2d_t p, int x, int y);
+PURE CONST uint __gen_ocl_sub_group_block_read_ui_image(image2d_t p, 
+int x, int y, int w, int h); PURE CONST uint2 
+__gen_ocl_sub_group_block_read_ui_image2(image2d_t p, int x, int y, int 
+w, int h); PURE CONST uint4 
+__gen_ocl_sub_group_block_read_ui_image4(image2d_t p, int x, int y, int 
+w, int h); PURE CONST uint8 
+__gen_ocl_sub_group_block_read_ui_image8(image2d_t p, int x, int y, int 
+w, int h);
 OVERLOADABLE uint intel_sub_group_block_read(image2d_t p, int2 cord)  {
-  return __gen_ocl_sub_group_block_read_ui_image(p, cord.x, cord.y);
+  return __gen_ocl_sub_group_block_read_ui_image(p, cord.x, cord.y, 0, 
+ 0);
 }
 OVERLOADABLE uint2 intel_sub_group_block_read2(image2d_t p, int2 cord)  {
-  return __gen_ocl_sub_group_block_read_ui_image2(p, cord.x, cord.y);
+  return __gen_ocl_sub_group_block_read_ui_image2(p, cord.x, cord.y, 0, 
+ 0);
 }
 OVERLOADABLE uint4 intel_sub_group_block_read4(image2d_t p, int2 cord)  {
-  return __gen_ocl_sub_group_block_read_ui_image4(p, cord.x, cord.y);
+  return __gen_ocl_sub_group_block_read_ui_image4(p, cord.x, cord.y, 0, 
+ 0);
 }
 OVERLOADABLE uint8 intel_sub_group_block_read8(image2d_t p, int2 cord)  {
-  return __gen_ocl_sub_group_block_read_ui_image8(p, cord.x, cord.y);
+  return __gen_ocl_sub_group_block_read_ui_image8(p, cord.x, cord.y, 0, 
+ 0);
 }
 OVERLOADABLE uint intel_sub_group_block_read_ui(image2d_t p, int2 cord)  {
-  return __gen_ocl_sub_group_block_read_ui_image(p, cord.x, cord.y);
+  return __gen_ocl_sub_group_block_read_ui_image(p, cord.x, cord.y, 0, 
+ 0);
 }
 OVERLOADABLE uint2 intel_sub_group_block_read_ui2(image2d_t p, int2 cord)  {
-  return __gen_ocl_sub_group_block_read_ui_image2(p, cord.x, cord.y);
+  return __gen_ocl_sub_group_block_read_ui_image2(p, cord.x, cord.y, 0, 
+ 0);
 }
 OVERLOADABLE uint4 intel_sub_group_block_read_ui4(image2d_t p, int2 cord)  {
-  return __gen_ocl_sub_group_block_read_ui_image4(p, cord.x, cord.y);
+  return __gen_ocl_sub_group_block_read_ui_image4(p, cord.x, cord.y, 0, 
+ 0);
 }
 OVERLOADABLE uint8 intel_sub_group_block_read_ui8(image2d_t p, int2 cord)  {
-  return __gen_ocl_sub_group_block_read_ui_image8(p, cord.x, cord.y);
+  return __gen_ocl_sub_group_block_read_ui_image8(p, cord.x, cord.y, 0, 
+0); }
+
+OVERLOADABLE uint intel_sub_group_media_block_read_ui(int2 
+src_byte_offset, int width, int height, read_only image2d_t image) {
+  return __gen_ocl_sub_group_block_read_ui_image(image, 
+src_byte_offset.x, src_byte_offset.y, width, height); }
+
+OVERLOADABLE uint2 intel_sub_group_media_block_read_ui2(int2 
+src_byte_offset, int width, int height, read_only image2d_t image) {
+  return __gen_ocl_sub_group_block_read_ui_image2(image, 
+src_byte_offset.x, src_byte_offset.y, width, height); }
+
+OVERLOADABLE uint4 intel_sub_group_media_block_read_ui4(int2 
+src_byte_offset, int width, int height, read_only image2d_t image) {
+  return __gen_ocl_sub_group_block_read_ui_image4(image, 
+src_byte_offset.x, src_byte_offset.y, width, height); }
+
+OVERLOADABLE uint8 intel_sub_group_media_block_read_ui8(int2 
+src_byte_offset, int width, int height, read_only image2d_t image) {
+  return __gen_ocl_sub_group_block_read_ui_image8(image, 
+src_byte_offset.x, src_byte_offset.y, width, height);
 }
 
 void __gen_ocl_sub_group_block_write_ui_image(image2d_t p, int x, int y, uint data); @@ -378,25 +398,51 @@ OVERLOADABLE void intel_sub_group_block_write_us8(global ushort* p,ushort8 data)
   __gen_ocl_sub_group_block_write_us_mem8(p, data);  }
 
-PURE CONST ushort __gen_ocl_sub_group_block_read_us_image(image2d_t p, int x, int y); -PURE CONST ushort2 __gen_ocl_sub_group_block_read_us_image2(image2d_t p, int x, int y); -PURE CONST ushort4 __gen_ocl_sub_group_block_read_us_image4(image2d_t p, int x, int y); -PURE CONST ushort8 __gen_ocl_sub_group_block_read_us_image8(image2d_t p, int x, int y);
+PURE CONST ushort __gen_ocl_sub_group_block_read_us_image(image2d_t p, 
+int x, int y, int w, int h); PURE CONST ushort2 
+__gen_ocl_sub_group_block_read_us_image2(image2d_t p, int x, int y, int 
+w, int h); PURE CONST ushort4 
+__gen_ocl_sub_group_block_read_us_image4(image2d_t p, int x, int y, int 
+w, int h); PURE CONST ushort8 
+__gen_ocl_sub_group_block_read_us_image8(image2d_t p, int x, int y, int 
+w, int h); PURE CONST ushort16 
+__gen_ocl_sub_group_block_read_us_image16(image2d_t p, int x, int y, 
+int w, int h);
 OVERLOADABLE ushort intel_sub_group_block_read_us(image2d_t p, int2 cord)  {
-  return __gen_ocl_sub_group_block_read_us_image(p, cord.x, cord.y);
+  return __gen_ocl_sub_group_block_read_us_image(p, cord.x, cord.y, 0, 
+ 0);
 }
 OVERLOADABLE ushort2 intel_sub_group_block_read_us2(image2d_t p, int2 cord)  {
-  return __gen_ocl_sub_group_block_read_us_image2(p, cord.x, cord.y);
+  return __gen_ocl_sub_group_block_read_us_image2(p, cord.x, cord.y, 0, 
+ 0);
 }
 OVERLOADABLE ushort4 intel_sub_group_block_read_us4(image2d_t p, int2 cord)  {
-  return __gen_ocl_sub_group_block_read_us_image4(p, cord.x, cord.y);
+  return __gen_ocl_sub_group_block_read_us_image4(p, cord.x, cord.y, 0, 
+ 0);
 }
 OVERLOADABLE ushort8 intel_sub_group_block_read_us8(image2d_t p, int2 cord)  {
-  return __gen_ocl_sub_group_block_read_us_image8(p, cord.x, cord.y);
+  return __gen_ocl_sub_group_block_read_us_image8(p, cord.x, cord.y, 0, 
+0); }
+
+OVERLOADABLE ushort intel_sub_group_media_block_read_us(int2 
+src_byte_offset, int width, int height, read_only image2d_t image) {
+  return __gen_ocl_sub_group_block_read_us_image(image, 
+src_byte_offset.x, src_byte_offset.y, width, height); }
+
+OVERLOADABLE ushort2 intel_sub_group_media_block_read_us2(int2 
+src_byte_offset, int width, int height, read_only image2d_t image) {
+  return __gen_ocl_sub_group_block_read_us_image2(image, 
+src_byte_offset.x, src_byte_offset.y, width, height); }
+
+OVERLOADABLE ushort4 intel_sub_group_media_block_read_us4(int2 
+src_byte_offset, int width, int height, read_only image2d_t image) {
+  return __gen_ocl_sub_group_block_read_us_image4(image, 
+src_byte_offset.x, src_byte_offset.y, width, height); }
+
+OVERLOADABLE ushort8 intel_sub_group_media_block_read_us8(int2 
+src_byte_offset, int width, int height, read_only image2d_t image) {
+  return __gen_ocl_sub_group_block_read_us_image8(image, 
+src_byte_offset.x, src_byte_offset.y, width, height); }
+
+OVERLOADABLE ushort16 intel_sub_group_media_block_read_us16(int2 
+src_byte_offset, int width, int height, read_only image2d_t image) {
+  return __gen_ocl_sub_group_block_read_us_image16(image, 
+src_byte_offset.x, src_byte_offset.y, width, height);
 }
 
 void __gen_ocl_sub_group_block_write_us_image(image2d_t p, int x, int y, ushort data); @@ -419,6 +465,37 @@ OVERLOADABLE void intel_sub_group_block_write_us8(image2d_t p, int2 cord, ushort  {
   __gen_ocl_sub_group_block_write_us_image8(p, cord.x, cord.y, data);  }
+
+PURE CONST uchar __gen_ocl_sub_group_block_read_uc_image(image2d_t p, 
+int x, int y, int w, int h); PURE CONST uchar2 
+__gen_ocl_sub_group_block_read_uc_image2(image2d_t p, int x, int y, int 
+w, int h); PURE CONST uchar4 
+__gen_ocl_sub_group_block_read_uc_image4(image2d_t p, int x, int y, int 
+w, int h); PURE CONST uchar8 
+__gen_ocl_sub_group_block_read_uc_image8(image2d_t p, int x, int y, int 
+w, int h); PURE CONST uchar16 
+__gen_ocl_sub_group_block_read_uc_image16(image2d_t p, int x, int y, 
+int w, int h); OVERLOADABLE uchar 
+intel_sub_group_media_block_read_uc(int2 src_byte_offset, int width, 
+int height, read_only image2d_t image) {
+  return __gen_ocl_sub_group_block_read_uc_image(image, 
+src_byte_offset.x, src_byte_offset.y, width, height); }
+
+OVERLOADABLE uchar2 intel_sub_group_media_block_read_uc2(int2 
+src_byte_offset, int width, int height, read_only image2d_t image) {
+  return __gen_ocl_sub_group_block_read_uc_image2(image, 
+src_byte_offset.x, src_byte_offset.y, width, height); }
+
+OVERLOADABLE uchar4 intel_sub_group_media_block_read_uc4(int2 
+src_byte_offset, int width, int height, read_only image2d_t image) {
+  return __gen_ocl_sub_group_block_read_uc_image4(image, 
+src_byte_offset.x, src_byte_offset.y, width, height); }
+
+OVERLOADABLE uchar8 intel_sub_group_media_block_read_uc8(int2 
+src_byte_offset, int width, int height, read_only image2d_t image) {
+  return __gen_ocl_sub_group_block_read_uc_image8(image, 
+src_byte_offset.x, src_byte_offset.y, width, height); }
+
+OVERLOADABLE uchar16 intel_sub_group_media_block_read_uc16(int2 
+src_byte_offset, int width, int height, read_only image2d_t image) {
+  return __gen_ocl_sub_group_block_read_uc_image16(image, 
+src_byte_offset.x, src_byte_offset.y, width, height); }
+
 #define SHUFFLE_DOWN(TYPE) \
 OVERLOADABLE TYPE intel_sub_group_shuffle_down(TYPE x, TYPE y, uint c) { \
   TYPE res0, res1; \
diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
index 608551b..2592d10 100644
--- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
@@ -250,3 +250,20 @@ OVERLOADABLE void intel_sub_group_block_write_us(image2d_t image, int2 byte_coor  OVERLOADABLE void intel_sub_group_block_write_us2(image2d_t image, int2 byte_coord, ushort2 data);  OVERLOADABLE void intel_sub_group_block_write_us4(image2d_t image, int2 byte_coord, ushort4 data);  OVERLOADABLE void intel_sub_group_block_write_us8(image2d_t image, int2 byte_coord, ushort8 data);
+
+OVERLOADABLE uchar intel_sub_group_media_block_read_uc(int2 
+src_byte_offset, int width, int height, read_only image2d_t image); 
+OVERLOADABLE uchar2 intel_sub_group_media_block_read_uc2(int2 
+src_byte_offset, int width, int height, read_only image2d_t image); 
+OVERLOADABLE uchar4 intel_sub_group_media_block_read_uc4(int2 
+src_byte_offset, int width, int height, read_only image2d_t image); 
+OVERLOADABLE uchar8 intel_sub_group_media_block_read_uc8(int2 
+src_byte_offset, int width, int height, read_only image2d_t image); 
+OVERLOADABLE uchar16 intel_sub_group_media_block_read_uc16(int2 
+src_byte_offset, int width, int height, read_only image2d_t image);
+
+OVERLOADABLE ushort intel_sub_group_media_block_read_us(int2 
+src_byte_offset, int width, int height, read_only image2d_t image); 
+OVERLOADABLE ushort2 intel_sub_group_media_block_read_us2(int2 
+src_byte_offset, int width, int height, read_only image2d_t image); 
+OVERLOADABLE ushort4 intel_sub_group_media_block_read_us4(int2 
+src_byte_offset, int width, int height, read_only image2d_t image); 
+OVERLOADABLE ushort8 intel_sub_group_media_block_read_us8(int2 
+src_byte_offset, int width, int height, read_only image2d_t image); 
+OVERLOADABLE ushort16 intel_sub_group_media_block_read_us16(int2 
+src_byte_offset, int width, int height, read_only image2d_t image);
+
+OVERLOADABLE uint intel_sub_group_media_block_read_ui(int2 
+src_byte_offset, int width, int height, read_only image2d_t image); 
+OVERLOADABLE uint2 intel_sub_group_media_block_read_ui2(int2 
+src_byte_offset, int width, int height, read_only image2d_t image); 
+OVERLOADABLE uint4 intel_sub_group_media_block_read_ui4(int2 
+src_byte_offset, int width, int height, read_only image2d_t image); 
+OVERLOADABLE uint8 intel_sub_group_media_block_read_ui8(int2 
+src_byte_offset, int width, int height, read_only image2d_t image);
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 3fefa92..faa9c37 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -737,6 +737,7 @@ namespace gbe
     // Emit subgroup instructions
     void emitBlockReadWriteMemInst(CallInst &I, CallSite &CS, bool isWrite, uint8_t vec_size, ir::Type = ir::TYPE_U32);
     void emitBlockReadWriteImageInst(CallInst &I, CallSite &CS, bool isWrite, uint8_t vec_size, ir::Type = ir::TYPE_U32);
+    void checkMediaBlockWidthandHeight(CallInst &I, uint8_t width, 
+ uint8_t height, uint8_t vec_size, ir::Type type);
 
     uint8_t appendSampler(CallSite::arg_iterator AI);
     uint8_t getImageID(CallInst &I);
@@ -4059,6 +4060,12 @@ namespace gbe
       case GEN_OCL_SUB_GROUP_BLOCK_READ_US_IMAGE2:
       case GEN_OCL_SUB_GROUP_BLOCK_READ_US_IMAGE4:
       case GEN_OCL_SUB_GROUP_BLOCK_READ_US_IMAGE8:
+      case GEN_OCL_SUB_GROUP_BLOCK_READ_US_IMAGE16:
+      case GEN_OCL_SUB_GROUP_BLOCK_READ_UC_IMAGE:
+      case GEN_OCL_SUB_GROUP_BLOCK_READ_UC_IMAGE2:
+      case GEN_OCL_SUB_GROUP_BLOCK_READ_UC_IMAGE4:
+      case GEN_OCL_SUB_GROUP_BLOCK_READ_UC_IMAGE8:
+      case GEN_OCL_SUB_GROUP_BLOCK_READ_UC_IMAGE16:
       case GEN_OCL_ENQUEUE_SET_NDRANGE_INFO:
       case GEN_OCL_ENQUEUE_GET_NDRANGE_INFO:
         this->newRegister(&I);
@@ -4463,6 +4470,61 @@ namespace gbe
     GBE_ASSERT(AI == AE);
   }
 
+  void GenWriter::checkMediaBlockWidthandHeight(CallInst& I, uint8_t width, uint8_t height, uint8_t vec_size, ir::Type type) {
+    if (width == 0) {
+      has_errors = true;
+      Func->getContext().emitError(&I,"Media Block width value illegal, width is:" + width);
+      ctx.getUnit().setValid(false);
+      return;
+    }
+    if (height == 0) {
+      has_errors = true;
+      Func->getContext().emitError(&I,"Media Block height value illegal, height is:" + height);
+      ctx.getUnit().setValid(false);
+      return;
+    }
+    uint32_t typeSize;
+    if (type == ir::TYPE_U8)
+      typeSize = 1;
+    else if (type == ir::TYPE_U16)
+      typeSize = 2;
+    else
+      typeSize = 4;
+
+    uint32_t widthBytes = width * typeSize;
+
+    uint32_t maxRows;
+    if (widthBytes <= 4)
+      maxRows = 64;
+    else if (widthBytes <= 8)
+      maxRows = 32;
+    else if (widthBytes <= 16)
+      maxRows = 16;
+    else
+      maxRows = 8;
+
+    if (widthBytes % 4 != 0) {
+      has_errors = true;
+      Func->getContext().emitError(&I,"Media Block widthBytes value illegal, widthBytes is:" + widthBytes);
+      ctx.getUnit().setValid(false);
+      return;
+    }
+
+    if ((typeSize == 4 && widthBytes > 64) || (typeSize != 4 && widthBytes > 32)) {
+      has_errors = true;
+      Func->getContext().emitError(&I,"Media Block widthBytes value illegal, widthBytes is:" + widthBytes);
+      ctx.getUnit().setValid(false);
+      return;
+    }
+
+    if (height > maxRows) {
+      has_errors = true;
+      Func->getContext().emitError(&I,"Media Block height value illegal, height is larger than: "  + maxRows);
+      ctx.getUnit().setValid(false);
+      return;
+    }
+  }
+
   void GenWriter::emitBlockReadWriteImageInst(CallInst &I, CallSite &CS, bool isWrite, uint8_t vec_size, ir::Type type) {
     CallSite::arg_iterator AI = CS.arg_begin();
     CallSite::arg_iterator AE = CS.arg_end(); @@ -4489,7 +4551,20 @@ namespace gbe
         dstTupleData.push_back(getRegister(&I, i));
       const ir::Tuple srctuple = ctx.arrayTuple(src, 2);
       const ir::Tuple dsttuple = ctx.arrayTuple(&dstTupleData[0], vec_size);
-      ctx.MBREAD(imageID, dsttuple, vec_size, srctuple, 2, type);
+      Constant *CWidth = dyn_cast<Constant>(*AI++);
+      GBE_ASSERT(CWidth != NULL);
+      const ir::Immediate &width = processConstantImm(CWidth);
+      Constant *CHeight = dyn_cast<Constant>(*AI++);
+      GBE_ASSERT(CHeight != NULL);
+      const ir::Immediate &height = processConstantImm(CHeight);
+      // check width and height legality.
+      if (width.getIntegerValue() != 0 || height.getIntegerValue() != 0) {
+        checkMediaBlockWidthandHeight(I, width.getIntegerValue(), height.getIntegerValue(), vec_size, type);
+        if(!ctx.getUnit().getValid())
+          return;
+      }
+      //map w * h region to simd_size
+      ctx.MBREAD(imageID, dsttuple, vec_size, srctuple, 2, type, 
+ width.getIntegerValue(), height.getIntegerValue());
     }
 
     GBE_ASSERT(AI == AE);
@@ -5473,6 +5548,18 @@ namespace gbe
             this->emitBlockReadWriteImageInst(I, CS, false, 4, ir::TYPE_U16); break;
           case GEN_OCL_SUB_GROUP_BLOCK_READ_US_IMAGE8:
             this->emitBlockReadWriteImageInst(I, CS, false, 8, ir::TYPE_U16); break;
+          case GEN_OCL_SUB_GROUP_BLOCK_READ_US_IMAGE16:
+            this->emitBlockReadWriteImageInst(I, CS, false, 16, ir::TYPE_U16); break;
+          case GEN_OCL_SUB_GROUP_BLOCK_READ_UC_IMAGE:
+            this->emitBlockReadWriteImageInst(I, CS, false, 1, ir::TYPE_U8); break;
+          case GEN_OCL_SUB_GROUP_BLOCK_READ_UC_IMAGE2:
+            this->emitBlockReadWriteImageInst(I, CS, false, 2, ir::TYPE_U8); break;
+          case GEN_OCL_SUB_GROUP_BLOCK_READ_UC_IMAGE4:
+            this->emitBlockReadWriteImageInst(I, CS, false, 4, ir::TYPE_U8); break;
+          case GEN_OCL_SUB_GROUP_BLOCK_READ_UC_IMAGE8:
+            this->emitBlockReadWriteImageInst(I, CS, false, 8, ir::TYPE_U8); break;
+          case GEN_OCL_SUB_GROUP_BLOCK_READ_UC_IMAGE16:
+            this->emitBlockReadWriteImageInst(I, CS, false, 16, 
+ ir::TYPE_U8); break;
           case GEN_OCL_SUB_GROUP_BLOCK_WRITE_US_IMAGE:
             this->emitBlockReadWriteImageInst(I, CS, true, 1, ir::TYPE_U16); break;
           case GEN_OCL_SUB_GROUP_BLOCK_WRITE_US_IMAGE2:
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 86485da..0243f05 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -250,10 +250,16 @@ DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_READ_US_IMAGE, __gen_ocl_sub_group_block_  DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_READ_US_IMAGE2, __gen_ocl_sub_group_block_read_us_image2)
 DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_READ_US_IMAGE4, __gen_ocl_sub_group_block_read_us_image4)
 DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_READ_US_IMAGE8, __gen_ocl_sub_group_block_read_us_image8)
+DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_READ_US_IMAGE16, 
+__gen_ocl_sub_group_block_read_us_image16)
 DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_WRITE_US_IMAGE, __gen_ocl_sub_group_block_write_us_image)
 DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_WRITE_US_IMAGE2, __gen_ocl_sub_group_block_write_us_image2)
 DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_WRITE_US_IMAGE4, __gen_ocl_sub_group_block_write_us_image4)
 DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_WRITE_US_IMAGE8, __gen_ocl_sub_group_block_write_us_image8)
+DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_READ_UC_IMAGE, 
+__gen_ocl_sub_group_block_read_uc_image)
+DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_READ_UC_IMAGE2, 
+__gen_ocl_sub_group_block_read_uc_image2)
+DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_READ_UC_IMAGE4, 
+__gen_ocl_sub_group_block_read_uc_image4)
+DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_READ_UC_IMAGE8, 
+__gen_ocl_sub_group_block_read_uc_image8)
+DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_READ_UC_IMAGE16, 
+__gen_ocl_sub_group_block_read_uc_image16)
 // common function
 DECL_LLVM_GEN_FUNCTION(LRP, __gen_ocl_lrp)
 
diff --git a/backend/src/llvm/llvm_scalarize.cpp b/backend/src/llvm/llvm_scalarize.cpp
index 8850abb..c413ab4 100644
--- a/backend/src/llvm/llvm_scalarize.cpp
+++ b/backend/src/llvm/llvm_scalarize.cpp
@@ -723,6 +723,11 @@ namespace gbe {
           case GEN_OCL_SUB_GROUP_BLOCK_READ_US_IMAGE2:
           case GEN_OCL_SUB_GROUP_BLOCK_READ_US_IMAGE4:
           case GEN_OCL_SUB_GROUP_BLOCK_READ_US_IMAGE8:
+          case GEN_OCL_SUB_GROUP_BLOCK_READ_US_IMAGE16:
+          case GEN_OCL_SUB_GROUP_BLOCK_READ_UC_IMAGE2:
+          case GEN_OCL_SUB_GROUP_BLOCK_READ_UC_IMAGE4:
+          case GEN_OCL_SUB_GROUP_BLOCK_READ_UC_IMAGE8:
+          case GEN_OCL_SUB_GROUP_BLOCK_READ_UC_IMAGE16:
             setAppendPoint(call);
             extractFromVector(call);
             break;
--
2.5.0

_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list